src/trusted/validator_ragel/unreviewed/decoder_test.c - Issue 11000033: Move validator_x86_XX.rl out of unreviewed.

Unified Diff: src/trusted/validator_ragel/unreviewed/decoder_test.c

Issue 11000033: Move validator_x86_XX.rl out of unreviewed. (Closed) Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client/

Patch Set: Created 8 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« src/trusted/validator_ragel/decoder_x86_64.rl ('K') | « src/trusted/validator_ragel/unreviewed/decoder.h ('k') | src/trusted/validator_ragel/unreviewed/decoder_x86_32.rl » ('j') | src/trusted/validator_ragel/validator_internal.h » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: src/trusted/validator_ragel/unreviewed/decoder_test.c

===================================================================

--- src/trusted/validator_ragel/unreviewed/decoder_test.c (revision 9996)

+++ src/trusted/validator_ragel/unreviewed/decoder_test.c (working copy)

@@ -14,7 +14,7 @@

#include "native_client/src/include/elf64.h"

#include "native_client/src/shared/platform/nacl_check.h"

#include "native_client/src/shared/utils/types.h"

-#include "native_client/src/trusted/validator_ragel/unreviewed/decoder.h"

+#include "native_client/src/trusted/validator_ragel/decoder.h"

/* This is a copy of NaClLog from shared/platform/nacl_log.c to avoid

* linking in code in NaCl shared code in the unreviewed/Makefile and be able to

@@ -150,128 +150,155 @@

}

-void ProcessInstruction(const uint8_t *begin, const uint8_t *end,

- struct Instruction *instruction, void *userdata) {

- const char *instruction_name = instruction->name;

- unsigned char operands_count = instruction->operands_count;

- unsigned char rex_prefix = instruction->prefix.rex;

- enum OperandName rm_index = instruction->rm.index;

- enum OperandName rm_base = instruction->rm.base;

- Bool data16_prefix = instruction->prefix.data16;

- const uint8_t *p;

- char delimeter = ' ';

- Bool print_rip = FALSE;

- Bool empty_rex_prefix_ok = FALSE;

- Bool spurious_rex_prefix = FALSE;

-#define print_name(x) (printf((x)), shown_name += strlen((x)))

- size_t shown_name = 0;

- int i, operand_type;

- /*

- * "fwait" is nasty: few of them will be included in other X87 instructions

- * ("fclex", "finit", "fstcw", "fstsw", "fsave" have two names, other

- * instructions are unchanged) - but if after them we see regular instruction

- * then we must print them all. This convoluted logic is not needed when we

- * don't print anything so decoder does not include it.

- */

- if (((end == begin + 1) && (begin[0] == 0x9b)) ||

- ((end == begin + 2) &&

- ((begin[0] & 0xf0) == 0x40) && (begin[1] == 0x9b))) {

- if (!(((struct DecodeState *)userdata)->fwait)) {

- ((struct DecodeState *)userdata)->fwait = begin;

+/*

+ * "fwait" is nasty: few of them will be included in other X87 instructions

+ * ("fclex", "finit", "fstcw", "fstsw", "fsave" have two names, other

+ * instructions are unchanged) - but if after them we see regular instruction

+ * then we must print them all. This convoluted logic is not needed when we

+ * don't print anything so decoder does not include it.

+ */

+static Bool ProcessFWait(const uint8_t **begin, const uint8_t *end,

Dmitry Polukhin 2012/11/02 17:31:06 Too many arguments, class with state should solve

+ struct Instruction *instruction,

+ struct DecodeState *userdata,

+ unsigned char *rex_prefix, Bool *spurious_rex_prefix,

+ enum OperandName *rm_base,

+ enum OperandName *rm_index) {

+ /* Instruction is "fwait" if it's 0x9b or if it's REX prefix plus 0x9b. */

+ if (((end == *begin + 1) && ((*begin)[0] == 0x9b)) ||

+ ((end == *begin + 2) &&

+ (((*begin)[0] & 0xf0) == 0x40) && ((*begin)[1] == 0x9b))) {

+ if (!(userdata->fwait)) {

+ userdata->fwait = *begin;

}

- return;

- } else if (((struct DecodeState *)userdata)->fwait) {

- /* If it's x87 instruction then we can fold some fwait's in the instruction

- itself. */

- if (((begin[0] >= 0xd8) && (begin[0] <= 0xdf)) ||

- ((((begin[0] & 0xf0) == 0x40) || (begin[0] == 0x66)) &&

- (begin[1] >= 0xd8) && (begin[1] <= 0xdf)) ||

- ((begin[0] == 0x66) || ((begin[1] & 0xf0) == 0x40) ||

- (begin[2] >= 0xd8) || (begin[2] <= 0xdf))) {

- /* fwait "prefix" can only include two 0x9b bytes or one rex byte - and

- * then only if the instruction itself have no rex prefix. */

- int fwait_count = !!data16_prefix;

- int rex_count = (!!rex_prefix) | (!!data16_prefix);

+ return TRUE;

+ } else if (userdata->fwait) {

+ /*

+ * If it's x87 instruction then we can fold some fwait's in the instruction

+ * itself.

+ *

+ * Instruction is x87 instruction if it has opcode from 0xd8 to 0xdf, but it

+ * can include either 0x66 prefix or REX prefix, or both.

+ */

+ if ((((*begin)[0] & 0xf8) == 0xd8) ||

+ (((((*begin)[0] & 0xf0) == 0x40) || ((*begin)[0] == 0x66)) &&

+ (((*begin)[1] & 0xf8) == 0xd8)) ||

+ (((*begin)[0] == 0x66) && (((*begin)[1] & 0xf0) == 0x40) &&

+ (((*begin)[2] & 0xf8) == 0xd8))) {

+ /*

+ * fwait "prefix" can only include two 0x9b bytes or one rex byte - and

+ * then only if the instruction itself have no rex prefix.

+ */

+ int fwait_count = !!(instruction->prefix.data16);

+ int rex_count = (!!*rex_prefix) | (!!(instruction->prefix.data16));

for (;;) {

- if (begin == ((struct DecodeState *)userdata)->fwait)

+ if (*begin == userdata->fwait)

break;

- if ((begin[-1]) == 0x9b) {

+ if (((*begin)[-1]) == 0x9b) {

if (fwait_count < 2) {

- --begin;

+ --*begin;

++fwait_count;

- if ((begin[1] & 0xf0) == 0x40)

+ if (((*begin)[1] & 0xf0) == 0x40)

break;

} else {

break;

}

- } else if ((begin[-1] & 0xf0) == 0x40) {

+ } else if (((*begin)[-1] & 0xf0) == 0x40) {

if (rex_count >= 1)

break;

- --begin;

+ --*begin;

++rex_count;

- if (!rex_prefix) {

- rex_prefix = *begin;

- /* Bug-to-bug compatibility, fun... */

- if ((rex_prefix & 0x01) && (rm_base <= REG_RDI)) {

- if (operands_count == 1 &&

+ if (!*rex_prefix) {

+ *rex_prefix = **begin;

+ /*

+ * Bug-to-bug compatibility: objdump will erroneously use bits from

+ * first REX prefix (attached to "fwait") and not from the

+ * second REX prefix (attached to the instruction itself).

+ *

+ * Duplicate this error here till it'll be fixed in objdump.

+ */

+ if ((*rex_prefix & 0x01) && (*rm_base <= REG_RDI)) {

+ if (instruction->operands_count == 1 &&

instruction->operands[0].name == REG_RM)

- rm_base |= REG_R8;

+ *rm_base |= REG_R8;

else

- spurious_rex_prefix = TRUE;

+ *spurious_rex_prefix = TRUE;

}

- if (rex_prefix & 0x02) {

- if (operands_count == 1 &&

+ if (*rex_prefix & 0x02) {

+ if (instruction->operands_count == 1 &&

instruction->operands[0].name == REG_RM) {

- if (rm_index <= REG_RDI)

- rm_index |= REG_R8;

- else if (rm_index == REG_RIZ)

- rm_index = REG_R12;

- else if (rm_index == NO_REG)

- spurious_rex_prefix = TRUE;

+ if (*rm_index <= REG_RDI)

+ *rm_index |= REG_R8;

+ else if (*rm_index == REG_RIZ)

+ *rm_index = REG_R12;

+ else if (*rm_index == NO_REG)

+ *spurious_rex_prefix = TRUE;

} else {

- spurious_rex_prefix = TRUE;

+ *spurious_rex_prefix = TRUE;

}

- if (rex_prefix & 0x0c)

- spurious_rex_prefix = TRUE;

+ if (*rex_prefix & 0x0c)

+ *spurious_rex_prefix = TRUE;

}

- if (begin != ((struct DecodeState *)userdata)->fwait) {

- while ((((struct DecodeState *)userdata)->fwait) < begin) {

+ if (*begin != userdata->fwait) {

+ while (userdata->fwait < *begin) {

printf("%*lx:\t%02x \tfwait\n",

- ((struct DecodeState *)userdata)->width,

- (long)((((struct DecodeState *)userdata)->fwait) -

- (((struct DecodeState *)userdata)->offset)),

- *((struct DecodeState *)userdata)->fwait);

- ++(((struct DecodeState *)userdata)->fwait);

+ userdata->width, (long)(userdata->fwait - userdata->offset),

+ *userdata->fwait);

+ ++(userdata->fwait);

}

} else {

- while ((((struct DecodeState *)userdata)->fwait) < begin) {

+ while ((userdata->fwait) < *begin) {

printf("%*lx:\t%02x \tfwait\n",

- ((struct DecodeState *)userdata)->width,

- (long)((((struct DecodeState *)userdata)->fwait) -

- (((struct DecodeState *)userdata)->offset)),

- *((struct DecodeState *)userdata)->fwait);

- ++(((struct DecodeState *)userdata)->fwait);

+ userdata->width, (long)(userdata->fwait - userdata->offset),

+ *userdata->fwait);

+ ++(userdata->fwait);

}

- ((struct DecodeState *)userdata)->fwait = FALSE;

+ userdata->fwait = FALSE;

}

+ return FALSE;

+void ProcessInstruction(const uint8_t *begin, const uint8_t *end,

Dmitry Polukhin 2012/11/02 17:31:06 I think it should be converted to so class Instruc

+ struct Instruction *instruction, void *callback_data) {

+ struct DecodeState *userdata = callback_data;

+ const char *instruction_name = instruction->name;

+ unsigned char operands_count = instruction->operands_count;

+ unsigned char rex_prefix = instruction->prefix.rex;

+ enum OperandName rm_index = instruction->rm.index;

+ enum OperandName rm_base = instruction->rm.base;

+ Bool data16_prefix = instruction->prefix.data16;

+ const uint8_t *p;

+ char delimeter = ' ';

+ Bool print_rip = FALSE;

+ Bool empty_rex_prefix_ok = FALSE;

+ Bool spurious_rex_prefix = FALSE;

+#define print_name(x) (printf((x)), shown_name += strlen((x)))

Brad Chen 2012/10/22 21:29:05 What is your goal in making this a macro rather th

+ size_t shown_name = 0;

+ int i, operand_type;

+ if (ProcessFWait(&begin, end, instruction, callback_data,

+ &rex_prefix, &spurious_rex_prefix, &rm_base, &rm_index))

+ return;

+ /*

+ * Objdump will print data16 (0x66) prefix on a separate line for "fbld",

+ * "fbstp", "fild", "fistp", "fld", and "fstp" instructions.

+ */

if ((data16_prefix) && (begin[0] == 0x66) && (!(rex_prefix & 0x08)) &&

(IsNameInList(instruction_name,

"fbld", "fbstp", "fild", "fistp", "fld", "fstp", NULL))) {

printf("%*lx:\t66 \tdata16\n",

- ((struct DecodeState *)userdata)->width,

- (long)(begin - (((struct DecodeState *)userdata)->offset)));

+ userdata->width, (long)(begin - userdata->offset));

data16_prefix = FALSE;

++begin;

}

- printf("%*lx:\t", ((struct DecodeState *)userdata)->width,

- (long)(begin - (((struct DecodeState *)userdata)->offset)));

+ /* Start the main processing part: print offset here. */

+ printf("%*lx:\t", userdata->width, (long)(begin - userdata->offset));

for (p = begin; p < begin + 7; ++p) {

if (p >= end)

printf(" ");

@@ -279,6 +306,7 @@

printf("%02x ", *p);

}

printf("\t");

* "pclmulqdq" has two-operand mnemonic names for "imm8" equal to 0x01, 0x01,

* 0x10, and 0x11. Objdump incorrectly mixes them up with 0x2 and 0x03.

@@ -292,6 +320,7 @@

--operands_count;

}

* "vpclmulqdq" has two-operand mnemonic names for "imm8" equal to 0x01, 0x01,

* 0x10, and 0x11. Objdump mixes them with 0x2 and 0x03.

@@ -305,14 +334,20 @@

--operands_count;

}

spurious_rex_prefix |=

rex_prefix &&

(instruction->prefix.rex_b_spurious ||

instruction->prefix.rex_x_spurious ||

instruction->prefix.rex_r_spurious ||

instruction->prefix.rex_w_spurious);

+ /*

+ * Objdump sometimes does not show spurious rex prefixes. Adjust the

+ * spurious_rex_prefix variable here for these cases.

+ */

if (operands_count > 0) {

- if (!((struct DecodeState *)userdata)->ia32_mode)

+ if (!userdata->ia32_mode)

for (i=0; i<operands_count; ++i)

* Objdump mistakenly allows "lock" with "mov %crX,%rXX" only in ia32

@@ -333,13 +368,7 @@

}

- /* Only few rare instructions show spurious REX.B in objdump. */

- if (!spurious_rex_prefix && instruction->prefix.rex_b_spurious)

- if (IsNameInList(instruction_name,

- "ja", "jae", "jbe", "jb", "je", "jg", "jge", "jle",

- "jl", "jne", "jno", "jnp", "jns", "jo", "jp", "js",

- "jecxz", "jrcxz", "loop", "loope", "loopne", NULL))

- spurious_rex_prefix = TRUE;

/* Some instructions don't show spurious REX.B in objdump. */

if (spurious_rex_prefix &&

instruction->prefix.rex_b_spurious &&

@@ -354,6 +383,7 @@

break;

}

/* Some instructions don't show spurious REX.W in objdump. */

if (spurious_rex_prefix &&

!instruction->prefix.rex_b_spurious &&

@@ -394,6 +424,12 @@

"popf", "push", "pushf", NULL)))

spurious_rex_prefix = TRUE;

+ /*

+ * Print prefixes. For the case where two prefixes are present we must print

+ * them in a corrent order. First print prefixes from begin[0], then handle

+ * prefixes from begin[1] (at this point we don't support more then two

+ * prefixes).

+ */

if (instruction->prefix.lock && (begin[0] == 0xf0))

print_name("lock ");

if (instruction->prefix.repnz && (begin[0] == 0xf2))

@@ -409,7 +445,6 @@

else

print_name("repz ");

}

if (((data16_prefix) && (rex_prefix & 0x08)) &&

!IsNameInList(instruction_name,

"bsf", "bsr", "fldenvs", "fnstenvs", "fnsaves", "frstors",

@@ -418,7 +453,6 @@

(begin[0] != 0x66) || ((begin[1] & 0x48) != 0x48) || (begin[2] != 0x90))

print_name("data32 ");

}

if (instruction->prefix.lock && (begin[0] != 0xf0)) {

print_name("lock ");

}

@@ -437,6 +471,11 @@

print_name("repz ");

}

+ /*

+ * REX prefix. Empty REX prefix (without REX.B, REX.X, REX.R, or REX.W bits)

+ * is not always spurious: it can be used to select between 8-bit registers

+ * (%ah vs %spl, %ch vs %bpl, %dh vs %dil, and %bh vs %sil).

+ */

if (rex_prefix == 0x40) {

if (operands_count > 0)

for (i=0; i<operands_count; ++i)

@@ -449,6 +488,9 @@

}

if (!empty_rex_prefix_ok)

print_name("rex ");

+ /*

+ * Non-empty REX prefix is shown if and only if if it's spurious.

Brad Chen 2012/10/22 21:29:05 if if

+ */

} else if (spurious_rex_prefix) {

print_name("rex.");

if (rex_prefix & 0x08) {

@@ -466,10 +508,13 @@

print_name(" ");

}

+ /* Prefixes are printed. Print the instruction name. */

printf("%s", instruction_name);

shown_name += strlen(instruction_name);

+ /* In some cases AT&T instruction uses suffix to show the size of operand. */

Brad Chen 2012/10/22 21:29:05 It looks to me like it may be the case that the st

if (instruction->att_instruction_suffix) {

+ /* But special versions of "nop" never use these suffixes. */

if (!IsNameInList(instruction_name,

"nopw 0x0(%eax,%eax,1)",

"nopw 0x0(%rax,%rax,1)",

@@ -493,11 +538,17 @@

print_name("q");

}

+ /* Regular "mov" with 64-bit immediate is printed as "movabs" by objdump. */

if (strcmp(instruction_name, "mov") == 0 &&

instruction->operands[1].name == REG_IMM &&

instruction->operands[1].type == OPERAND_TYPE_64_BIT)

print_name("abs");

+ /*

+ * Jump instructions can use branch-prediction prefixes. They are shown as

+ * suffixes by objdump.

+ */

if (IsNameInList(instruction_name,

"ja", "jae", "jbe", "jb", "je", "jg", "jge", "jle",

"jl", "jne", "jno", "jnp", "jns", "jo", "jp", "js",

@@ -507,8 +558,9 @@

else if (instruction->prefix.branch_taken)

print_name(",pt");

}

+#undef print_name

-#undef print_name

+ /* Objdump does not print spaces after some "special" instructions. */

if ((strcmp(instruction_name, "nop") != 0 || operands_count != 0) &&

!IsNameInList(

instruction_name,

@@ -530,6 +582,10 @@

"data32 data32 data32 data32 data32 nopw %cs:0x0(%eax,%eax,1)",

"data32 data32 data32 data32 data32 nopw %cs:0x0(%rax,%rax,1)",

NULL)) {

+ /*

+ * In this is "regular" instruction with a short name then it's operands are

+ * aligned.

+ */

while (shown_name < 6) {

printf(" ");

++shown_name;

@@ -537,8 +593,11 @@

if (operands_count == 0)

printf(" ");

}

+ /* Print instruction operands. */

for (i=operands_count-1; i>=0; --i) {

printf("%c", delimeter);

+ /* If the instruction is "call" or "jmp" then we need to print asterics. */

if (IsNameInList(instruction_name, "call", "jmp", "lcall", "ljmp", NULL) &&

instruction->operands[i].name != JMP_TO)

printf("*");

@@ -558,6 +617,8 @@

} else {

operand_type = instruction->operands[i].type;

}

+ /* All the special cases are handled, let's print the normal operands! */

switch (instruction->operands[i].name) {

case REG_RAX:

case REG_RCX:

@@ -590,7 +651,7 @@

else

printf("-0x%"NACL_PRIx64, -instruction->rm.offset);

}

- if (((struct DecodeState *)userdata)->ia32_mode) {

+ if (userdata->ia32_mode) {

if ((rm_base != NO_REG) ||

(rm_index != NO_REG) ||

(instruction->rm.scale != 0))

@@ -649,19 +710,19 @@

printf("(%%dx)");

break;

case REG_DS_RBX:

- if (((struct DecodeState *)userdata)->ia32_mode)

+ if (userdata->ia32_mode)

printf("%%ds:(%%ebx)");

else

printf("%%ds:(%%rbx)");

break;

case REG_ES_RDI:

- if (((struct DecodeState *)userdata)->ia32_mode)

+ if (userdata->ia32_mode)

printf("%%es:(%%edi)");

else

printf("%%es:(%%rdi)");

break;

case REG_DS_RSI:

- if (((struct DecodeState *)userdata)->ia32_mode)

+ if (userdata->ia32_mode)

printf("%%ds:(%%esi)");

else

printf("%%ds:(%%rsi)");

@@ -669,10 +730,10 @@

case JMP_TO:

if (instruction->operands[0].type == OPERAND_TYPE_16_BIT)

printf("0x%lx", (long)((end + instruction->rm.offset -

- (((struct DecodeState *)userdata)->offset)) & 0xffff));

+ (userdata->offset)) & 0xffff));

else

printf("0x%lx", (long)(end + instruction->rm.offset -

- (((struct DecodeState *)userdata)->offset)));

+ (userdata->offset)));

break;

case REG_RIP:

case REG_RIZ:

@@ -681,16 +742,23 @@

}

delimeter = ',';

}

+ /*

+ * If %rip was used then objdump will show the actual address as the comment.

+ */

if (print_rip) {

printf(" # 0x%8"NACL_PRIx64,

(uint64_t) (end + instruction->rm.offset -

- (((struct DecodeState *)userdata)->offset)));

+ (userdata->offset)));

}

+ /* First line of instruction decoding is printed. Finish the line. */

printf("\n");

begin += 7;

+ /* if there are more then seven bytes we need to print the rest. */

while (begin < end) {

- printf("%*"NACL_PRIx64":\t", ((struct DecodeState *)userdata)->width,

- (uint64_t) (begin - (((struct DecodeState *)userdata)->offset)));

+ printf("%*"NACL_PRIx64":\t", userdata->width,

+ (uint64_t) (begin - (userdata->offset)));

for (p = begin; p < begin + 7; ++p) {

if (p >= end) {

printf("\n");