Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(177)

Unified Diff: src/trusted/validator_ragel/unreviewed/decoder_test.c

Issue 11000033: Move validator_x86_XX.rl out of unreviewed. (Closed) Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client/
Patch Set: Created 8 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/trusted/validator_ragel/unreviewed/decoder_test.c
===================================================================
--- src/trusted/validator_ragel/unreviewed/decoder_test.c (revision 9996)
+++ src/trusted/validator_ragel/unreviewed/decoder_test.c (working copy)
@@ -14,7 +14,7 @@
#include "native_client/src/include/elf64.h"
#include "native_client/src/shared/platform/nacl_check.h"
#include "native_client/src/shared/utils/types.h"
-#include "native_client/src/trusted/validator_ragel/unreviewed/decoder.h"
+#include "native_client/src/trusted/validator_ragel/decoder.h"
/* This is a copy of NaClLog from shared/platform/nacl_log.c to avoid
* linking in code in NaCl shared code in the unreviewed/Makefile and be able to
@@ -150,128 +150,155 @@
}
}
-void ProcessInstruction(const uint8_t *begin, const uint8_t *end,
- struct Instruction *instruction, void *userdata) {
- const char *instruction_name = instruction->name;
- unsigned char operands_count = instruction->operands_count;
- unsigned char rex_prefix = instruction->prefix.rex;
- enum OperandName rm_index = instruction->rm.index;
- enum OperandName rm_base = instruction->rm.base;
- Bool data16_prefix = instruction->prefix.data16;
- const uint8_t *p;
- char delimeter = ' ';
- Bool print_rip = FALSE;
- Bool empty_rex_prefix_ok = FALSE;
- Bool spurious_rex_prefix = FALSE;
-#define print_name(x) (printf((x)), shown_name += strlen((x)))
- size_t shown_name = 0;
- int i, operand_type;
-
- /*
- * "fwait" is nasty: few of them will be included in other X87 instructions
- * ("fclex", "finit", "fstcw", "fstsw", "fsave" have two names, other
- * instructions are unchanged) - but if after them we see regular instruction
- * then we must print them all. This convoluted logic is not needed when we
- * don't print anything so decoder does not include it.
- */
- if (((end == begin + 1) && (begin[0] == 0x9b)) ||
- ((end == begin + 2) &&
- ((begin[0] & 0xf0) == 0x40) && (begin[1] == 0x9b))) {
- if (!(((struct DecodeState *)userdata)->fwait)) {
- ((struct DecodeState *)userdata)->fwait = begin;
+/*
+ * "fwait" is nasty: few of them will be included in other X87 instructions
+ * ("fclex", "finit", "fstcw", "fstsw", "fsave" have two names, other
+ * instructions are unchanged) - but if after them we see regular instruction
+ * then we must print them all. This convoluted logic is not needed when we
+ * don't print anything so decoder does not include it.
+ */
+static Bool ProcessFWait(const uint8_t **begin, const uint8_t *end,
Dmitry Polukhin 2012/11/02 17:31:06 Too many arguments, class with state should solve
+ struct Instruction *instruction,
+ struct DecodeState *userdata,
+ unsigned char *rex_prefix, Bool *spurious_rex_prefix,
+ enum OperandName *rm_base,
+ enum OperandName *rm_index) {
+ /* Instruction is "fwait" if it's 0x9b or if it's REX prefix plus 0x9b. */
+ if (((end == *begin + 1) && ((*begin)[0] == 0x9b)) ||
+ ((end == *begin + 2) &&
+ (((*begin)[0] & 0xf0) == 0x40) && ((*begin)[1] == 0x9b))) {
+ if (!(userdata->fwait)) {
+ userdata->fwait = *begin;
}
- return;
- } else if (((struct DecodeState *)userdata)->fwait) {
- /* If it's x87 instruction then we can fold some fwait's in the instruction
- itself. */
- if (((begin[0] >= 0xd8) && (begin[0] <= 0xdf)) ||
- ((((begin[0] & 0xf0) == 0x40) || (begin[0] == 0x66)) &&
- (begin[1] >= 0xd8) && (begin[1] <= 0xdf)) ||
- ((begin[0] == 0x66) || ((begin[1] & 0xf0) == 0x40) ||
- (begin[2] >= 0xd8) || (begin[2] <= 0xdf))) {
- /* fwait "prefix" can only include two 0x9b bytes or one rex byte - and
- * then only if the instruction itself have no rex prefix. */
- int fwait_count = !!data16_prefix;
- int rex_count = (!!rex_prefix) | (!!data16_prefix);
+ return TRUE;
+ } else if (userdata->fwait) {
+ /*
+ * If it's x87 instruction then we can fold some fwait's in the instruction
+ * itself.
+ *
+ * Instruction is x87 instruction if it has opcode from 0xd8 to 0xdf, but it
+ * can include either 0x66 prefix or REX prefix, or both.
+ */
+ if ((((*begin)[0] & 0xf8) == 0xd8) ||
+ (((((*begin)[0] & 0xf0) == 0x40) || ((*begin)[0] == 0x66)) &&
+ (((*begin)[1] & 0xf8) == 0xd8)) ||
+ (((*begin)[0] == 0x66) && (((*begin)[1] & 0xf0) == 0x40) &&
+ (((*begin)[2] & 0xf8) == 0xd8))) {
+ /*
+ * fwait "prefix" can only include two 0x9b bytes or one rex byte - and
+ * then only if the instruction itself have no rex prefix.
+ */
+ int fwait_count = !!(instruction->prefix.data16);
+ int rex_count = (!!*rex_prefix) | (!!(instruction->prefix.data16));
for (;;) {
- if (begin == ((struct DecodeState *)userdata)->fwait)
+ if (*begin == userdata->fwait)
break;
- if ((begin[-1]) == 0x9b) {
+ if (((*begin)[-1]) == 0x9b) {
if (fwait_count < 2) {
- --begin;
+ --*begin;
++fwait_count;
- if ((begin[1] & 0xf0) == 0x40)
+ if (((*begin)[1] & 0xf0) == 0x40)
break;
} else {
break;
}
- } else if ((begin[-1] & 0xf0) == 0x40) {
+ } else if (((*begin)[-1] & 0xf0) == 0x40) {
if (rex_count >= 1)
break;
- --begin;
+ --*begin;
++rex_count;
- if (!rex_prefix) {
- rex_prefix = *begin;
- /* Bug-to-bug compatibility, fun... */
- if ((rex_prefix & 0x01) && (rm_base <= REG_RDI)) {
- if (operands_count == 1 &&
+ if (!*rex_prefix) {
+ *rex_prefix = **begin;
+ /*
+ * Bug-to-bug compatibility: objdump will erroneously use bits from
+ * first REX prefix (attached to "fwait") and not from the
+ * second REX prefix (attached to the instruction itself).
+ *
+ * Duplicate this error here till it'll be fixed in objdump.
+ */
+ if ((*rex_prefix & 0x01) && (*rm_base <= REG_RDI)) {
+ if (instruction->operands_count == 1 &&
instruction->operands[0].name == REG_RM)
- rm_base |= REG_R8;
+ *rm_base |= REG_R8;
else
- spurious_rex_prefix = TRUE;
+ *spurious_rex_prefix = TRUE;
}
- if (rex_prefix & 0x02) {
- if (operands_count == 1 &&
+ if (*rex_prefix & 0x02) {
+ if (instruction->operands_count == 1 &&
instruction->operands[0].name == REG_RM) {
- if (rm_index <= REG_RDI)
- rm_index |= REG_R8;
- else if (rm_index == REG_RIZ)
- rm_index = REG_R12;
- else if (rm_index == NO_REG)
- spurious_rex_prefix = TRUE;
+ if (*rm_index <= REG_RDI)
+ *rm_index |= REG_R8;
+ else if (*rm_index == REG_RIZ)
+ *rm_index = REG_R12;
+ else if (*rm_index == NO_REG)
+ *spurious_rex_prefix = TRUE;
} else {
- spurious_rex_prefix = TRUE;
+ *spurious_rex_prefix = TRUE;
}
}
- if (rex_prefix & 0x0c)
- spurious_rex_prefix = TRUE;
+ if (*rex_prefix & 0x0c)
+ *spurious_rex_prefix = TRUE;
}
}
}
- if (begin != ((struct DecodeState *)userdata)->fwait) {
- while ((((struct DecodeState *)userdata)->fwait) < begin) {
+ if (*begin != userdata->fwait) {
+ while (userdata->fwait < *begin) {
printf("%*lx:\t%02x \tfwait\n",
- ((struct DecodeState *)userdata)->width,
- (long)((((struct DecodeState *)userdata)->fwait) -
- (((struct DecodeState *)userdata)->offset)),
- *((struct DecodeState *)userdata)->fwait);
- ++(((struct DecodeState *)userdata)->fwait);
+ userdata->width, (long)(userdata->fwait - userdata->offset),
+ *userdata->fwait);
+ ++(userdata->fwait);
}
}
} else {
- while ((((struct DecodeState *)userdata)->fwait) < begin) {
+ while ((userdata->fwait) < *begin) {
printf("%*lx:\t%02x \tfwait\n",
- ((struct DecodeState *)userdata)->width,
- (long)((((struct DecodeState *)userdata)->fwait) -
- (((struct DecodeState *)userdata)->offset)),
- *((struct DecodeState *)userdata)->fwait);
- ++(((struct DecodeState *)userdata)->fwait);
+ userdata->width, (long)(userdata->fwait - userdata->offset),
+ *userdata->fwait);
+ ++(userdata->fwait);
}
}
- ((struct DecodeState *)userdata)->fwait = FALSE;
+ userdata->fwait = FALSE;
}
+ return FALSE;
+}
+void ProcessInstruction(const uint8_t *begin, const uint8_t *end,
Dmitry Polukhin 2012/11/02 17:31:06 I think it should be converted to so class Instruc
+ struct Instruction *instruction, void *callback_data) {
+ struct DecodeState *userdata = callback_data;
+ const char *instruction_name = instruction->name;
+ unsigned char operands_count = instruction->operands_count;
+ unsigned char rex_prefix = instruction->prefix.rex;
+ enum OperandName rm_index = instruction->rm.index;
+ enum OperandName rm_base = instruction->rm.base;
+ Bool data16_prefix = instruction->prefix.data16;
+ const uint8_t *p;
+ char delimeter = ' ';
+ Bool print_rip = FALSE;
+ Bool empty_rex_prefix_ok = FALSE;
+ Bool spurious_rex_prefix = FALSE;
+#define print_name(x) (printf((x)), shown_name += strlen((x)))
Brad Chen 2012/10/22 21:29:05 What is your goal in making this a macro rather th
+ size_t shown_name = 0;
+ int i, operand_type;
+
+ if (ProcessFWait(&begin, end, instruction, callback_data,
+ &rex_prefix, &spurious_rex_prefix, &rm_base, &rm_index))
+ return;
+
+ /*
+ * Objdump will print data16 (0x66) prefix on a separate line for "fbld",
+ * "fbstp", "fild", "fistp", "fld", and "fstp" instructions.
+ */
if ((data16_prefix) && (begin[0] == 0x66) && (!(rex_prefix & 0x08)) &&
(IsNameInList(instruction_name,
"fbld", "fbstp", "fild", "fistp", "fld", "fstp", NULL))) {
printf("%*lx:\t66 \tdata16\n",
- ((struct DecodeState *)userdata)->width,
- (long)(begin - (((struct DecodeState *)userdata)->offset)));
+ userdata->width, (long)(begin - userdata->offset));
data16_prefix = FALSE;
++begin;
}
- printf("%*lx:\t", ((struct DecodeState *)userdata)->width,
- (long)(begin - (((struct DecodeState *)userdata)->offset)));
+
+ /* Start the main processing part: print offset here. */
+ printf("%*lx:\t", userdata->width, (long)(begin - userdata->offset));
for (p = begin; p < begin + 7; ++p) {
if (p >= end)
printf(" ");
@@ -279,6 +306,7 @@
printf("%02x ", *p);
}
printf("\t");
+
/*
* "pclmulqdq" has two-operand mnemonic names for "imm8" equal to 0x01, 0x01,
* 0x10, and 0x11. Objdump incorrectly mixes them up with 0x2 and 0x03.
@@ -292,6 +320,7 @@
--operands_count;
}
}
+
/*
* "vpclmulqdq" has two-operand mnemonic names for "imm8" equal to 0x01, 0x01,
* 0x10, and 0x11. Objdump mixes them with 0x2 and 0x03.
@@ -305,14 +334,20 @@
--operands_count;
}
}
+
spurious_rex_prefix |=
rex_prefix &&
(instruction->prefix.rex_b_spurious ||
instruction->prefix.rex_x_spurious ||
instruction->prefix.rex_r_spurious ||
instruction->prefix.rex_w_spurious);
+
+ /*
+ * Objdump sometimes does not show spurious rex prefixes. Adjust the
+ * spurious_rex_prefix variable here for these cases.
+ */
if (operands_count > 0) {
- if (!((struct DecodeState *)userdata)->ia32_mode)
+ if (!userdata->ia32_mode)
for (i=0; i<operands_count; ++i)
/*
* Objdump mistakenly allows "lock" with "mov %crX,%rXX" only in ia32
@@ -333,13 +368,7 @@
}
}
}
- /* Only few rare instructions show spurious REX.B in objdump. */
- if (!spurious_rex_prefix && instruction->prefix.rex_b_spurious)
- if (IsNameInList(instruction_name,
- "ja", "jae", "jbe", "jb", "je", "jg", "jge", "jle",
- "jl", "jne", "jno", "jnp", "jns", "jo", "jp", "js",
- "jecxz", "jrcxz", "loop", "loope", "loopne", NULL))
- spurious_rex_prefix = TRUE;
+
/* Some instructions don't show spurious REX.B in objdump. */
if (spurious_rex_prefix &&
instruction->prefix.rex_b_spurious &&
@@ -354,6 +383,7 @@
break;
}
}
+
/* Some instructions don't show spurious REX.W in objdump. */
if (spurious_rex_prefix &&
!instruction->prefix.rex_b_spurious &&
@@ -394,6 +424,12 @@
"popf", "push", "pushf", NULL)))
spurious_rex_prefix = TRUE;
+ /*
+ * Print prefixes. For the case where two prefixes are present we must print
+ * them in a corrent order. First print prefixes from begin[0], then handle
+ * prefixes from begin[1] (at this point we don't support more then two
+ * prefixes).
+ */
if (instruction->prefix.lock && (begin[0] == 0xf0))
print_name("lock ");
if (instruction->prefix.repnz && (begin[0] == 0xf2))
@@ -409,7 +445,6 @@
else
print_name("repz ");
}
-
if (((data16_prefix) && (rex_prefix & 0x08)) &&
!IsNameInList(instruction_name,
"bsf", "bsr", "fldenvs", "fnstenvs", "fnsaves", "frstors",
@@ -418,7 +453,6 @@
(begin[0] != 0x66) || ((begin[1] & 0x48) != 0x48) || (begin[2] != 0x90))
print_name("data32 ");
}
-
if (instruction->prefix.lock && (begin[0] != 0xf0)) {
print_name("lock ");
}
@@ -437,6 +471,11 @@
print_name("repz ");
}
+ /*
+ * REX prefix. Empty REX prefix (without REX.B, REX.X, REX.R, or REX.W bits)
+ * is not always spurious: it can be used to select between 8-bit registers
+ * (%ah vs %spl, %ch vs %bpl, %dh vs %dil, and %bh vs %sil).
+ */
if (rex_prefix == 0x40) {
if (operands_count > 0)
for (i=0; i<operands_count; ++i)
@@ -449,6 +488,9 @@
}
if (!empty_rex_prefix_ok)
print_name("rex ");
+ /*
+ * Non-empty REX prefix is shown if and only if if it's spurious.
Brad Chen 2012/10/22 21:29:05 if if
+ */
} else if (spurious_rex_prefix) {
print_name("rex.");
if (rex_prefix & 0x08) {
@@ -466,10 +508,13 @@
print_name(" ");
}
+ /* Prefixes are printed. Print the instruction name. */
printf("%s", instruction_name);
shown_name += strlen(instruction_name);
+ /* In some cases AT&T instruction uses suffix to show the size of operand. */
Brad Chen 2012/10/22 21:29:05 It looks to me like it may be the case that the st
if (instruction->att_instruction_suffix) {
+ /* But special versions of "nop" never use these suffixes. */
if (!IsNameInList(instruction_name,
"nopw 0x0(%eax,%eax,1)",
"nopw 0x0(%rax,%rax,1)",
@@ -493,11 +538,17 @@
print_name("q");
}
}
+
+ /* Regular "mov" with 64-bit immediate is printed as "movabs" by objdump. */
if (strcmp(instruction_name, "mov") == 0 &&
instruction->operands[1].name == REG_IMM &&
instruction->operands[1].type == OPERAND_TYPE_64_BIT)
print_name("abs");
+ /*
+ * Jump instructions can use branch-prediction prefixes. They are shown as
+ * suffixes by objdump.
+ */
if (IsNameInList(instruction_name,
"ja", "jae", "jbe", "jb", "je", "jg", "jge", "jle",
"jl", "jne", "jno", "jnp", "jns", "jo", "jp", "js",
@@ -507,8 +558,9 @@
else if (instruction->prefix.branch_taken)
print_name(",pt");
}
+#undef print_name
-#undef print_name
+ /* Objdump does not print spaces after some "special" instructions. */
if ((strcmp(instruction_name, "nop") != 0 || operands_count != 0) &&
!IsNameInList(
instruction_name,
@@ -530,6 +582,10 @@
"data32 data32 data32 data32 data32 nopw %cs:0x0(%eax,%eax,1)",
"data32 data32 data32 data32 data32 nopw %cs:0x0(%rax,%rax,1)",
NULL)) {
+ /*
+ * In this is "regular" instruction with a short name then it's operands are
+ * aligned.
+ */
while (shown_name < 6) {
printf(" ");
++shown_name;
@@ -537,8 +593,11 @@
if (operands_count == 0)
printf(" ");
}
+
+ /* Print instruction operands. */
for (i=operands_count-1; i>=0; --i) {
printf("%c", delimeter);
+ /* If the instruction is "call" or "jmp" then we need to print asterics. */
if (IsNameInList(instruction_name, "call", "jmp", "lcall", "ljmp", NULL) &&
instruction->operands[i].name != JMP_TO)
printf("*");
@@ -558,6 +617,8 @@
} else {
operand_type = instruction->operands[i].type;
}
+
+ /* All the special cases are handled, let's print the normal operands! */
switch (instruction->operands[i].name) {
case REG_RAX:
case REG_RCX:
@@ -590,7 +651,7 @@
else
printf("-0x%"NACL_PRIx64, -instruction->rm.offset);
}
- if (((struct DecodeState *)userdata)->ia32_mode) {
+ if (userdata->ia32_mode) {
if ((rm_base != NO_REG) ||
(rm_index != NO_REG) ||
(instruction->rm.scale != 0))
@@ -649,19 +710,19 @@
printf("(%%dx)");
break;
case REG_DS_RBX:
- if (((struct DecodeState *)userdata)->ia32_mode)
+ if (userdata->ia32_mode)
printf("%%ds:(%%ebx)");
else
printf("%%ds:(%%rbx)");
break;
case REG_ES_RDI:
- if (((struct DecodeState *)userdata)->ia32_mode)
+ if (userdata->ia32_mode)
printf("%%es:(%%edi)");
else
printf("%%es:(%%rdi)");
break;
case REG_DS_RSI:
- if (((struct DecodeState *)userdata)->ia32_mode)
+ if (userdata->ia32_mode)
printf("%%ds:(%%esi)");
else
printf("%%ds:(%%rsi)");
@@ -669,10 +730,10 @@
case JMP_TO:
if (instruction->operands[0].type == OPERAND_TYPE_16_BIT)
printf("0x%lx", (long)((end + instruction->rm.offset -
- (((struct DecodeState *)userdata)->offset)) & 0xffff));
+ (userdata->offset)) & 0xffff));
else
printf("0x%lx", (long)(end + instruction->rm.offset -
- (((struct DecodeState *)userdata)->offset)));
+ (userdata->offset)));
break;
case REG_RIP:
case REG_RIZ:
@@ -681,16 +742,23 @@
}
delimeter = ',';
}
+
+ /*
+ * If %rip was used then objdump will show the actual address as the comment.
+ */
if (print_rip) {
printf(" # 0x%8"NACL_PRIx64,
(uint64_t) (end + instruction->rm.offset -
- (((struct DecodeState *)userdata)->offset)));
+ (userdata->offset)));
}
+
+ /* First line of instruction decoding is printed. Finish the line. */
printf("\n");
begin += 7;
+ /* if there are more then seven bytes we need to print the rest. */
while (begin < end) {
- printf("%*"NACL_PRIx64":\t", ((struct DecodeState *)userdata)->width,
- (uint64_t) (begin - (((struct DecodeState *)userdata)->offset)));
+ printf("%*"NACL_PRIx64":\t", userdata->width,
+ (uint64_t) (begin - (userdata->offset)));
for (p = begin; p < begin + 7; ++p) {
if (p >= end) {
printf("\n");

Powered by Google App Engine
This is Rietveld 408576698