src/trusted/validator_ragel/validator_x86_64.rl - Issue 11000033: Move validator_x86_XX.rl out of unreviewed.

Unified Diff: src/trusted/validator_ragel/validator_x86_64.rl

Issue 11000033: Move validator_x86_XX.rl out of unreviewed. (Closed) Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client/

Patch Set: Created 7 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/trusted/validator_ragel/validator_x86_64.rl

===================================================================

--- src/trusted/validator_ragel/validator_x86_64.rl (revision 10976)

+++ src/trusted/validator_ragel/validator_x86_64.rl (working copy)

@@ -20,7 +20,7 @@

#include <string.h>

#include "native_client/src/trusted/validator_ragel/bitmap.h"

-#include "native_client/src/trusted/validator_ragel/unreviewed/validator_internal.h"

+#include "native_client/src/trusted/validator_ragel/validator_internal.h"

%%{

machine x86_64_validator;

@@ -64,18 +64,14 @@

"native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";

action check_access {

- CheckAccess(instruction_begin - data, base, index, restricted_register,

- valid_targets, &instruction_info_collected);

+ CheckAccess(instruction_begin - codeblock,

+ base,

+ index,

+ restricted_register,

+ valid_targets,

+ &instruction_info_collected);

}

- # Action which marks last byte as not immediate. Most 3DNow! instructions,

- # some AVX and XOP instructions have this proerty. It's referenced by

- # decode_x86_32 machine in [autogenerated] "validator_x86_32_instruction.rl"

- # file.

- action last_byte_is_not_immediate {

- instruction_info_collected |= LAST_BYTE_IS_NOT_IMMEDIATE;

- }

action modifiable_instruction {

instruction_info_collected |= MODIFIABLE_INSTRUCTION;

}

@@ -136,7 +132,7 @@

# But since these instructions are "second half" of the %rbp sandboxing they

# can be used *only* when %rbp is restricted.

- # That is (normal instruction):

+ # Compare:

# mov %eax,%ebp

# mov %esi,%edi <- Error: %ebp is restricted

# vs

@@ -149,11 +145,13 @@

# Check this precondition and mark the beginning of the instruction as

# invalid jump for target.

@{ if (restricted_register == REG_RBP)

+ /* RESTRICTED_REGISTER_USED is informational flag used in tests. */

instruction_info_collected |= RESTRICTED_REGISTER_USED;

else

+ /* UNRESTRICTED_RSP_PROCESSED is error flag used in production. */

instruction_info_collected |= UNRESTRICTED_RBP_PROCESSED;

restricted_register = NO_REG;

- UnmarkValidJumpTarget((instruction_begin - data), valid_targets);

+ UnmarkValidJumpTarget((instruction_begin - codeblock), valid_targets);

};

# Special %rsp modifications - the ones which don't need a sandboxing.

@@ -211,7 +209,7 @@

else

instruction_info_collected |= UNRESTRICTED_RSP_PROCESSED;

restricted_register = NO_REG;

- UnmarkValidJumpTarget((instruction_begin - data), valid_targets);

+ UnmarkValidJumpTarget((instruction_begin - codeblock), valid_targets);

};

# naclcall or nacljmp. These are three-instruction indirection-jump sequences.

@@ -219,7 +217,7 @@

# and RBASE, %rXX

# jmpq *%rXX (or: callq *%rXX)

# Note: first "and $~0x1f, %eXX" is a normal instruction (it can occur not

- # just as part of the naclcall/nacljmp, but also as a standolene instruction).

+ # just as part of the naclcall/nacljmp, but also as a standalene instruction).

# This means that when naclcall_or_nacljmp ragel machine will be combined with

# "normal_instruction*" regular action process_1_operand_zero_extends will be

@@ -239,7 +237,7 @@

# byte for the dst while last one uses field RM of the ModR/M byte for the src

# and field REG of the ModR/M byte for dst. Both should be allowed.

- # See AMD/Intel manual for clarification "add" instruction encoding.

+ # See AMD/Intel manual for clarification about “add” instruction encoding.

# REGISTER USAGE ABBREVIATIONS:

# E86: legacy ia32 registers (all eight: %eax to %edi)

@@ -266,8 +264,10 @@

(REX_WRX? 0xff b_11_100_xxx)))

ProcessNaclCallOrJmpAddToRMNoRex(&instruction_info_collected,

- &instruction_begin, current_position,

- data, valid_targets);

+ &instruction_begin,

+ current_position,

+ codeblock,

+ valid_targets);

} |

# This block encodes call and jump "superinstruction" of the following form:

@@ -288,8 +288,10 @@

(REX_WRX? 0xff b_11_100_xxx)))

ProcessNaclCallOrJmpAddToRegNoRex(&instruction_info_collected,

- &instruction_begin, current_position,

- data, valid_targets);

+ &instruction_begin,

+ current_position,

+ codeblock,

+ valid_targets);

} |

# This block encodes call and jump "superinstruction" of the following form:

@@ -327,8 +329,10 @@

(b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111)))))

ProcessNaclCallOrJmpAddToRMWithRex(&instruction_info_collected,

- &instruction_begin, current_position,

- data, valid_targets);

+ &instruction_begin,

+ current_position,

+ codeblock,

+ valid_targets);

} |

# This block encodes call and jump "superinstruction" of the following form:

@@ -366,8 +370,10 @@

(b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111)))))

ProcessNaclCallOrJmpAddToRegWithRex(&instruction_info_collected,

- &instruction_begin, current_position,

- data, valid_targets);

+ &instruction_begin,

+ current_position,

+ codeblock,

+ valid_targets);

};

# EMMX/SSE/SSE2/AVX instructions which have implicit %ds:(%rsi) operand

@@ -434,7 +440,10 @@

string_instruction_rsi_no_rdi

ExpandSuperinstructionBySandboxingBytes(

- 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);

+ 2 /* mov */ + 4 /* lea */,

+ &instruction_begin,

+ codeblock,

+ valid_targets);

} |

REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi

@@ -442,7 +451,10 @@

string_instruction_rsi_no_rdi

ExpandSuperinstructionBySandboxingBytes(

- 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);

+ 3 /* mov */ + 4 /* lea */,

+ &instruction_begin,

+ codeblock,

+ valid_targets);

};

# "Superinstruction" which includes %rdi sandboxing.

@@ -460,7 +472,10 @@

(string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction)

ExpandSuperinstructionBySandboxingBytes(

- 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);

+ 2 /* mov */ + 4 /* lea */,

+ &instruction_begin,

+ codeblock,

+ valid_targets);

} |

REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi

@@ -468,7 +483,10 @@

(string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction)

ExpandSuperinstructionBySandboxingBytes(

- 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);

+ 3 /* mov */ + 4 /* lea */,

+ &instruction_begin,

+ codeblock,

+ valid_targets);

};

@@ -491,7 +509,9 @@

ExpandSuperinstructionBySandboxingBytes(

2 /* mov */ + 4 /* lea */ + 2 /* mov */ + 4 /* lea */,

- &instruction_begin, data, valid_targets);

+ &instruction_begin,

+ codeblock,

+ valid_targets);

} |

(((0x89 | 0x8b) 0xf6 # mov %esi,%esi

@@ -508,7 +528,9 @@

ExpandSuperinstructionBySandboxingBytes(

2 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */

/* == 3 (* mov *) + 4 (* lea *) + 2 (* mov *) + 4 (* lea *) */,

- &instruction_begin, data, valid_targets);

+ &instruction_begin,

+ codeblock,

+ valid_targets);

} |

REX_X (0x89 | 0x8b) 0xf6 . # mov %esi,%esi

@@ -519,7 +541,9 @@

ExpandSuperinstructionBySandboxingBytes(

3 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */,

- &instruction_begin, data, valid_targets);

+ &instruction_begin,

+ codeblock,

+ valid_targets);

};

# All the "special" instructions (== instructions which obey non-standard

@@ -549,21 +573,21 @@

# Remove special instructions which are only allowed in special cases.

normal_instruction = one_instruction - special_instruction;

- # Check if call is properly aligned.

- #

- # For direct call we explicitly encode all variations. For indirect call

- # we accept all the special instructions which ends with register-addressed

- # indirect call.

+ # For direct call we explicitly encode all variations.

+ direct_call = (data16 REX_RXB? 0xe8 rel16) |

+ (REX_WRXB? 0xe8 rel32) |

+ (data16 REXW_RXB 0xe8 rel32);

+ # For indirect call we accept only near register-addressed indirect call.

+ indirect_call_register = data16? REX_WRXB? 0xff (opcode_2 & modrm_registers);

+ # Ragel machine that accepts one call instruction or call superinstruction and

+ # checks if call is properly aligned.

call_alignment =

- ((normal_instruction &

- # Direct call

- ((data16 REX_RXB? 0xe8 rel16) |

- (REX_WRXB? 0xe8 rel32) |

- (data16 REXW_RXB 0xe8 rel32))) |

- (special_instruction &

- # Indirect call

- (any* data16? REX_WRXB? 0xff ((opcode_2 | opcode_3) any* &

- modrm_registers))))

+ ((normal_instruction & direct_call)

+ # For indirect calls we accept all the special instructions which ends with

+ # register-addressed indirect call.

+ (special_instruction & (any* indirect_call_register)))

# Call instruction must aligned to the end of bundle. Previously this was

# strict requirement, today it's just warning to aid with debugging.

@@ -580,6 +604,15 @@

# After that we move instruction_begin and clean all the variables which

# only used in the processing of a single instruction (prefixes, operand

# states and instruction_info_collected).

+ # This action calls users callback (if needed) and cleans up validators

+ # internal state.

+ #

+ # We call the user callback either on validation errors or on every

+ # instruction, depending on CALL_USER_CALLBACK_ON_EACH_INSTRUTION option.

+ #

+ # After that we move instruction_begin and clean all the variables which

+ # are only used in the processing of a single instruction (prefixes, operand

+ # states and instruction_info_collected).

action end_of_instruction_cleanup {

/* Call user-supplied callback. */

instruction_end = current_position + 1;

@@ -601,7 +634,7 @@

* Note: we mark start of the next instruction here, not start of the

* current one because memory access check should be able to clear this

* bit when restricted register is used. */

- MarkValidJumpTarget(instruction_begin - data, valid_targets);

+ MarkValidJumpTarget(instruction_begin - codeblock, valid_targets);

/* Clear variables. */

instruction_info_collected = 0;

@@ -630,7 +663,7 @@

}

# This is main ragel machine: it does 99% of validation work. There are only

- # one thing to do with bundle if this machine accepts the bundle:

+ # one thing to do with bundle if this ragel machine accepts the bundle:

# * check for the state of the restricted_register at the end of the bundle.

# It's an error is %rbp or %rsp is restricted at the end of the bundle.

# Additionally if all the bundles are fine you need to check that direct jumps

@@ -645,6 +678,10 @@

}%%

+/*

+ * The "write data" statement causes Ragel to emit the constant static data

+ * needed by the ragel machine.

+ */

%% write data;

enum OperandKind {

@@ -853,7 +890,7 @@

static INLINE void ExpandSuperinstructionBySandboxingBytes(

size_t sandbox_instructions_size,

const uint8_t **instruction_begin,

- const uint8_t *data,

+ const uint8_t codeblock[],

bitmap_word *valid_targets) {

*instruction_begin -= sandbox_instructions_size;

@@ -861,7 +898,7 @@

* don't need to mark the beginning of the whole "superinstruction" - that's

* why we move start by one byte and don't change the length.

- UnmarkValidJumpTargets((*instruction_begin + 1 - data),

+ UnmarkValidJumpTargets((*instruction_begin + 1 - codeblock),

sandbox_instructions_size,

valid_targets);

}

@@ -991,11 +1028,14 @@

uint32_t *instruction_info_collected,

const uint8_t **instruction_begin,

const uint8_t *current_position,

- const uint8_t *data,

+ const uint8_t codeblock[],

bitmap_word *valid_targets) {

if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position))

ExpandSuperinstructionBySandboxingBytes(

- 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets);

+ 3 /* and */ + 3 /* add */,

+ instruction_begin,

+ codeblock,

+ valid_targets);

else

*instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;

}

@@ -1028,11 +1068,14 @@

uint32_t *instruction_info_collected,

const uint8_t **instruction_begin,

const uint8_t *current_position,

- const uint8_t *data,

+ const uint8_t codeblock[],

bitmap_word *valid_targets) {

if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position))

ExpandSuperinstructionBySandboxingBytes(

- 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets);

+ 3 /* and */ + 3 /* add */,

+ instruction_begin,

+ codeblock,

+ valid_targets);

else

*instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;

}

@@ -1071,11 +1114,14 @@

uint32_t *instruction_info_collected,

const uint8_t **instruction_begin,

const uint8_t *current_position,

- const uint8_t *data,

+ const uint8_t codeblock[],

bitmap_word *valid_targets) {

if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position))

ExpandSuperinstructionBySandboxingBytes(

- 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets);

+ 4 /* and */ + 3 /* add */,

+ instruction_begin,

+ codeblock,

+ valid_targets);

else

*instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;

}

@@ -1114,17 +1160,21 @@

uint32_t *instruction_info_collected,

const uint8_t **instruction_begin,

const uint8_t *current_position,

- const uint8_t *data,

+ const uint8_t codeblock[],

bitmap_word *valid_targets) {

if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position))

ExpandSuperinstructionBySandboxingBytes(

- 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets);

+ 4 /* and */ + 3 /* add */,

+ instruction_begin,

+ codeblock,

+ valid_targets);

else

*instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;

}

-Bool ValidateChunkAMD64(const uint8_t *data, size_t size,

+Bool ValidateChunkAMD64(const uint8_t codeblock[],

+ size_t size,

uint32_t options,

const NaClCPUFeaturesX86 *cpu_features,

ValidationCallbackFunc user_callback,

@@ -1168,21 +1218,21 @@

* This option is usually used in tests: we will process the whole chunk

* in one pass. Usually each bundle is processed separately which means

- * instructions (and super-instructions) can not cross borders of the bundle.

+ * instructions (and "superinstructions") can not cross borders of the bundle.

if (options & PROCESS_CHUNK_AS_A_CONTIGUOUS_STREAM)

- end_of_bundle = data + size;

+ end_of_bundle = codeblock + size;

else

- end_of_bundle = data + kBundleSize;

+ end_of_bundle = codeblock + kBundleSize;

- * Main loop. Here we process the data array bundle-after-bundle.

+ * Main loop. Here we process the codeblock array bundle-after-bundle.

* Ragel-produced DFA does all the checks with one exception: direct jumps.

* It collects the two arrays: valid_targets and jump_dests which are used

* to test direct jumps later.

- for (current_position = data;

- current_position < data + size;

+ for (current_position = codeblock;

+ current_position < codeblock + size;

current_position = end_of_bundle,

end_of_bundle = current_position + kBundleSize) {

/* Start of the instruction being processed. */

@@ -1204,7 +1254,15 @@

uint8_t vex_prefix2 = VEX_R | VEX_X | VEX_B;

uint8_t vex_prefix3 = 0x00;

+ /*

+ * The "write init" statement causes Ragel to emit initialization code.

+ * This should be executed once before the ragel machine is started.

+ */

%% write init;

+ /*

+ * The "write exec" statement causes Ragel to emit the ragel machine's

+ * execution code.

+ */

%% write exec;

@@ -1227,8 +1285,12 @@

* Check the direct jumps. All the targets from jump_dests must be in

* valid_targets.

- result &= ProcessInvalidJumpTargets(data, size, valid_targets, jump_dests,

- user_callback, callback_data);

+ result &= ProcessInvalidJumpTargets(codeblock,

+ size,

+ valid_targets,

+ jump_dests,

+ user_callback,

+ callback_data);

/* We only use malloc for a large code sequences */

if (jump_dests != &jump_dests_small) free(jump_dests);

« src/trusted/validator_ragel/validator_x86_32.rl ('K') | « src/trusted/validator_ragel/validator_x86_32.rl ('k') | no next file » | no next file with comments »