| Index: src/trusted/validator_ragel/validator_x86_64.rl
|
| ===================================================================
|
| --- src/trusted/validator_ragel/validator_x86_64.rl (revision 10976)
|
| +++ src/trusted/validator_ragel/validator_x86_64.rl (working copy)
|
| @@ -20,7 +20,7 @@
|
| #include <string.h>
|
|
|
| #include "native_client/src/trusted/validator_ragel/bitmap.h"
|
| -#include "native_client/src/trusted/validator_ragel/unreviewed/validator_internal.h"
|
| +#include "native_client/src/trusted/validator_ragel/validator_internal.h"
|
|
|
| %%{
|
| machine x86_64_validator;
|
| @@ -64,18 +64,14 @@
|
| "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
|
|
|
| action check_access {
|
| - CheckAccess(instruction_begin - data, base, index, restricted_register,
|
| - valid_targets, &instruction_info_collected);
|
| + CheckAccess(instruction_begin - codeblock,
|
| + base,
|
| + index,
|
| + restricted_register,
|
| + valid_targets,
|
| + &instruction_info_collected);
|
| }
|
|
|
| - # Action which marks last byte as not immediate. Most 3DNow! instructions,
|
| - # some AVX and XOP instructions have this proerty. It's referenced by
|
| - # decode_x86_32 machine in [autogenerated] "validator_x86_32_instruction.rl"
|
| - # file.
|
| - action last_byte_is_not_immediate {
|
| - instruction_info_collected |= LAST_BYTE_IS_NOT_IMMEDIATE;
|
| - }
|
| -
|
| action modifiable_instruction {
|
| instruction_info_collected |= MODIFIABLE_INSTRUCTION;
|
| }
|
| @@ -136,7 +132,7 @@
|
| # But since these instructions are "second half" of the %rbp sandboxing they
|
| # can be used *only* when %rbp is restricted.
|
| #
|
| - # That is (normal instruction):
|
| + # Compare:
|
| # mov %eax,%ebp
|
| # mov %esi,%edi <- Error: %ebp is restricted
|
| # vs
|
| @@ -149,11 +145,13 @@
|
| # Check this precondition and mark the beginning of the instruction as
|
| # invalid jump for target.
|
| @{ if (restricted_register == REG_RBP)
|
| + /* RESTRICTED_REGISTER_USED is informational flag used in tests. */
|
| instruction_info_collected |= RESTRICTED_REGISTER_USED;
|
| else
|
| + /* UNRESTRICTED_RSP_PROCESSED is error flag used in production. */
|
| instruction_info_collected |= UNRESTRICTED_RBP_PROCESSED;
|
| restricted_register = NO_REG;
|
| - UnmarkValidJumpTarget((instruction_begin - data), valid_targets);
|
| + UnmarkValidJumpTarget((instruction_begin - codeblock), valid_targets);
|
| };
|
|
|
| # Special %rsp modifications - the ones which don't need a sandboxing.
|
| @@ -211,7 +209,7 @@
|
| else
|
| instruction_info_collected |= UNRESTRICTED_RSP_PROCESSED;
|
| restricted_register = NO_REG;
|
| - UnmarkValidJumpTarget((instruction_begin - data), valid_targets);
|
| + UnmarkValidJumpTarget((instruction_begin - codeblock), valid_targets);
|
| };
|
|
|
| # naclcall or nacljmp. These are three-instruction indirection-jump sequences.
|
| @@ -219,7 +217,7 @@
|
| # and RBASE, %rXX
|
| # jmpq *%rXX (or: callq *%rXX)
|
| # Note: first "and $~0x1f, %eXX" is a normal instruction (it can occur not
|
| - # just as part of the naclcall/nacljmp, but also as a standolene instruction).
|
| + # just as part of the naclcall/nacljmp, but also as a standalene instruction).
|
| #
|
| # This means that when naclcall_or_nacljmp ragel machine will be combined with
|
| # "normal_instruction*" regular action process_1_operand_zero_extends will be
|
| @@ -239,7 +237,7 @@
|
| # byte for the dst while last one uses field RM of the ModR/M byte for the src
|
| # and field REG of the ModR/M byte for dst. Both should be allowed.
|
| #
|
| - # See AMD/Intel manual for clarification "add" instruction encoding.
|
| + # See AMD/Intel manual for clarification about “add” instruction encoding.
|
| #
|
| # REGISTER USAGE ABBREVIATIONS:
|
| # E86: legacy ia32 registers (all eight: %eax to %edi)
|
| @@ -266,8 +264,10 @@
|
| (REX_WRX? 0xff b_11_100_xxx)))
|
| @{
|
| ProcessNaclCallOrJmpAddToRMNoRex(&instruction_info_collected,
|
| - &instruction_begin, current_position,
|
| - data, valid_targets);
|
| + &instruction_begin,
|
| + current_position,
|
| + codeblock,
|
| + valid_targets);
|
| } |
|
|
|
| # This block encodes call and jump "superinstruction" of the following form:
|
| @@ -288,8 +288,10 @@
|
| (REX_WRX? 0xff b_11_100_xxx)))
|
| @{
|
| ProcessNaclCallOrJmpAddToRegNoRex(&instruction_info_collected,
|
| - &instruction_begin, current_position,
|
| - data, valid_targets);
|
| + &instruction_begin,
|
| + current_position,
|
| + codeblock,
|
| + valid_targets);
|
| } |
|
|
|
| # This block encodes call and jump "superinstruction" of the following form:
|
| @@ -327,8 +329,10 @@
|
| (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111)))))
|
| @{
|
| ProcessNaclCallOrJmpAddToRMWithRex(&instruction_info_collected,
|
| - &instruction_begin, current_position,
|
| - data, valid_targets);
|
| + &instruction_begin,
|
| + current_position,
|
| + codeblock,
|
| + valid_targets);
|
| } |
|
|
|
| # This block encodes call and jump "superinstruction" of the following form:
|
| @@ -366,8 +370,10 @@
|
| (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111)))))
|
| @{
|
| ProcessNaclCallOrJmpAddToRegWithRex(&instruction_info_collected,
|
| - &instruction_begin, current_position,
|
| - data, valid_targets);
|
| + &instruction_begin,
|
| + current_position,
|
| + codeblock,
|
| + valid_targets);
|
| };
|
|
|
| # EMMX/SSE/SSE2/AVX instructions which have implicit %ds:(%rsi) operand
|
| @@ -434,7 +440,10 @@
|
| string_instruction_rsi_no_rdi
|
| @{
|
| ExpandSuperinstructionBySandboxingBytes(
|
| - 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);
|
| + 2 /* mov */ + 4 /* lea */,
|
| + &instruction_begin,
|
| + codeblock,
|
| + valid_targets);
|
| } |
|
|
|
| REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi
|
| @@ -442,7 +451,10 @@
|
| string_instruction_rsi_no_rdi
|
| @{
|
| ExpandSuperinstructionBySandboxingBytes(
|
| - 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);
|
| + 3 /* mov */ + 4 /* lea */,
|
| + &instruction_begin,
|
| + codeblock,
|
| + valid_targets);
|
| };
|
|
|
| # "Superinstruction" which includes %rdi sandboxing.
|
| @@ -460,7 +472,10 @@
|
| (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction)
|
| @{
|
| ExpandSuperinstructionBySandboxingBytes(
|
| - 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);
|
| + 2 /* mov */ + 4 /* lea */,
|
| + &instruction_begin,
|
| + codeblock,
|
| + valid_targets);
|
| } |
|
|
|
| REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi
|
| @@ -468,7 +483,10 @@
|
| (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction)
|
| @{
|
| ExpandSuperinstructionBySandboxingBytes(
|
| - 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);
|
| + 3 /* mov */ + 4 /* lea */,
|
| + &instruction_begin,
|
| + codeblock,
|
| + valid_targets);
|
| };
|
|
|
|
|
| @@ -491,7 +509,9 @@
|
| @{
|
| ExpandSuperinstructionBySandboxingBytes(
|
| 2 /* mov */ + 4 /* lea */ + 2 /* mov */ + 4 /* lea */,
|
| - &instruction_begin, data, valid_targets);
|
| + &instruction_begin,
|
| + codeblock,
|
| + valid_targets);
|
| } |
|
|
|
| (((0x89 | 0x8b) 0xf6 # mov %esi,%esi
|
| @@ -508,7 +528,9 @@
|
| ExpandSuperinstructionBySandboxingBytes(
|
| 2 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */
|
| /* == 3 (* mov *) + 4 (* lea *) + 2 (* mov *) + 4 (* lea *) */,
|
| - &instruction_begin, data, valid_targets);
|
| + &instruction_begin,
|
| + codeblock,
|
| + valid_targets);
|
| } |
|
|
|
| REX_X (0x89 | 0x8b) 0xf6 . # mov %esi,%esi
|
| @@ -519,7 +541,9 @@
|
| @{
|
| ExpandSuperinstructionBySandboxingBytes(
|
| 3 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */,
|
| - &instruction_begin, data, valid_targets);
|
| + &instruction_begin,
|
| + codeblock,
|
| + valid_targets);
|
| };
|
|
|
| # All the "special" instructions (== instructions which obey non-standard
|
| @@ -549,21 +573,21 @@
|
| # Remove special instructions which are only allowed in special cases.
|
| normal_instruction = one_instruction - special_instruction;
|
|
|
| - # Check if call is properly aligned.
|
| - #
|
| - # For direct call we explicitly encode all variations. For indirect call
|
| - # we accept all the special instructions which ends with register-addressed
|
| - # indirect call.
|
| + # For direct call we explicitly encode all variations.
|
| + direct_call = (data16 REX_RXB? 0xe8 rel16) |
|
| + (REX_WRXB? 0xe8 rel32) |
|
| + (data16 REXW_RXB 0xe8 rel32);
|
| +
|
| + # For indirect call we accept only near register-addressed indirect call.
|
| + indirect_call_register = data16? REX_WRXB? 0xff (opcode_2 & modrm_registers);
|
| +
|
| + # Ragel machine that accepts one call instruction or call superinstruction and
|
| + # checks if call is properly aligned.
|
| call_alignment =
|
| - ((normal_instruction &
|
| - # Direct call
|
| - ((data16 REX_RXB? 0xe8 rel16) |
|
| - (REX_WRXB? 0xe8 rel32) |
|
| - (data16 REXW_RXB 0xe8 rel32))) |
|
| - (special_instruction &
|
| - # Indirect call
|
| - (any* data16? REX_WRXB? 0xff ((opcode_2 | opcode_3) any* &
|
| - modrm_registers))))
|
| + ((normal_instruction & direct_call)
|
| + # For indirect calls we accept all the special instructions which ends with
|
| + # register-addressed indirect call.
|
| + (special_instruction & (any* indirect_call_register)))
|
| # Call instruction must aligned to the end of bundle. Previously this was
|
| # strict requirement, today it's just warning to aid with debugging.
|
| @{
|
| @@ -580,6 +604,15 @@
|
| # After that we move instruction_begin and clean all the variables which
|
| # only used in the processing of a single instruction (prefixes, operand
|
| # states and instruction_info_collected).
|
| + # This action calls users callback (if needed) and cleans up validators
|
| + # internal state.
|
| + #
|
| + # We call the user callback either on validation errors or on every
|
| + # instruction, depending on CALL_USER_CALLBACK_ON_EACH_INSTRUTION option.
|
| + #
|
| + # After that we move instruction_begin and clean all the variables which
|
| + # are only used in the processing of a single instruction (prefixes, operand
|
| + # states and instruction_info_collected).
|
| action end_of_instruction_cleanup {
|
| /* Call user-supplied callback. */
|
| instruction_end = current_position + 1;
|
| @@ -601,7 +634,7 @@
|
| * Note: we mark start of the next instruction here, not start of the
|
| * current one because memory access check should be able to clear this
|
| * bit when restricted register is used. */
|
| - MarkValidJumpTarget(instruction_begin - data, valid_targets);
|
| + MarkValidJumpTarget(instruction_begin - codeblock, valid_targets);
|
|
|
| /* Clear variables. */
|
| instruction_info_collected = 0;
|
| @@ -630,7 +663,7 @@
|
| }
|
|
|
| # This is main ragel machine: it does 99% of validation work. There are only
|
| - # one thing to do with bundle if this machine accepts the bundle:
|
| + # one thing to do with bundle if this ragel machine accepts the bundle:
|
| # * check for the state of the restricted_register at the end of the bundle.
|
| # It's an error is %rbp or %rsp is restricted at the end of the bundle.
|
| # Additionally if all the bundles are fine you need to check that direct jumps
|
| @@ -645,6 +678,10 @@
|
|
|
| }%%
|
|
|
| +/*
|
| + * The "write data" statement causes Ragel to emit the constant static data
|
| + * needed by the ragel machine.
|
| + */
|
| %% write data;
|
|
|
| enum OperandKind {
|
| @@ -853,7 +890,7 @@
|
| static INLINE void ExpandSuperinstructionBySandboxingBytes(
|
| size_t sandbox_instructions_size,
|
| const uint8_t **instruction_begin,
|
| - const uint8_t *data,
|
| + const uint8_t codeblock[],
|
| bitmap_word *valid_targets) {
|
| *instruction_begin -= sandbox_instructions_size;
|
| /*
|
| @@ -861,7 +898,7 @@
|
| * don't need to mark the beginning of the whole "superinstruction" - that's
|
| * why we move start by one byte and don't change the length.
|
| */
|
| - UnmarkValidJumpTargets((*instruction_begin + 1 - data),
|
| + UnmarkValidJumpTargets((*instruction_begin + 1 - codeblock),
|
| sandbox_instructions_size,
|
| valid_targets);
|
| }
|
| @@ -991,11 +1028,14 @@
|
| uint32_t *instruction_info_collected,
|
| const uint8_t **instruction_begin,
|
| const uint8_t *current_position,
|
| - const uint8_t *data,
|
| + const uint8_t codeblock[],
|
| bitmap_word *valid_targets) {
|
| if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position))
|
| ExpandSuperinstructionBySandboxingBytes(
|
| - 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets);
|
| + 3 /* and */ + 3 /* add */,
|
| + instruction_begin,
|
| + codeblock,
|
| + valid_targets);
|
| else
|
| *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
|
| }
|
| @@ -1028,11 +1068,14 @@
|
| uint32_t *instruction_info_collected,
|
| const uint8_t **instruction_begin,
|
| const uint8_t *current_position,
|
| - const uint8_t *data,
|
| + const uint8_t codeblock[],
|
| bitmap_word *valid_targets) {
|
| if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position))
|
| ExpandSuperinstructionBySandboxingBytes(
|
| - 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets);
|
| + 3 /* and */ + 3 /* add */,
|
| + instruction_begin,
|
| + codeblock,
|
| + valid_targets);
|
| else
|
| *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
|
| }
|
| @@ -1071,11 +1114,14 @@
|
| uint32_t *instruction_info_collected,
|
| const uint8_t **instruction_begin,
|
| const uint8_t *current_position,
|
| - const uint8_t *data,
|
| + const uint8_t codeblock[],
|
| bitmap_word *valid_targets) {
|
| if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position))
|
| ExpandSuperinstructionBySandboxingBytes(
|
| - 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets);
|
| + 4 /* and */ + 3 /* add */,
|
| + instruction_begin,
|
| + codeblock,
|
| + valid_targets);
|
| else
|
| *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
|
| }
|
| @@ -1114,17 +1160,21 @@
|
| uint32_t *instruction_info_collected,
|
| const uint8_t **instruction_begin,
|
| const uint8_t *current_position,
|
| - const uint8_t *data,
|
| + const uint8_t codeblock[],
|
| bitmap_word *valid_targets) {
|
| if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position))
|
| ExpandSuperinstructionBySandboxingBytes(
|
| - 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets);
|
| + 4 /* and */ + 3 /* add */,
|
| + instruction_begin,
|
| + codeblock,
|
| + valid_targets);
|
| else
|
| *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
|
| }
|
|
|
|
|
| -Bool ValidateChunkAMD64(const uint8_t *data, size_t size,
|
| +Bool ValidateChunkAMD64(const uint8_t codeblock[],
|
| + size_t size,
|
| uint32_t options,
|
| const NaClCPUFeaturesX86 *cpu_features,
|
| ValidationCallbackFunc user_callback,
|
| @@ -1168,21 +1218,21 @@
|
| /*
|
| * This option is usually used in tests: we will process the whole chunk
|
| * in one pass. Usually each bundle is processed separately which means
|
| - * instructions (and super-instructions) can not cross borders of the bundle.
|
| + * instructions (and "superinstructions") can not cross borders of the bundle.
|
| */
|
| if (options & PROCESS_CHUNK_AS_A_CONTIGUOUS_STREAM)
|
| - end_of_bundle = data + size;
|
| + end_of_bundle = codeblock + size;
|
| else
|
| - end_of_bundle = data + kBundleSize;
|
| + end_of_bundle = codeblock + kBundleSize;
|
|
|
| /*
|
| - * Main loop. Here we process the data array bundle-after-bundle.
|
| + * Main loop. Here we process the codeblock array bundle-after-bundle.
|
| * Ragel-produced DFA does all the checks with one exception: direct jumps.
|
| * It collects the two arrays: valid_targets and jump_dests which are used
|
| * to test direct jumps later.
|
| */
|
| - for (current_position = data;
|
| - current_position < data + size;
|
| + for (current_position = codeblock;
|
| + current_position < codeblock + size;
|
| current_position = end_of_bundle,
|
| end_of_bundle = current_position + kBundleSize) {
|
| /* Start of the instruction being processed. */
|
| @@ -1204,7 +1254,15 @@
|
| uint8_t vex_prefix2 = VEX_R | VEX_X | VEX_B;
|
| uint8_t vex_prefix3 = 0x00;
|
|
|
| + /*
|
| + * The "write init" statement causes Ragel to emit initialization code.
|
| + * This should be executed once before the ragel machine is started.
|
| + */
|
| %% write init;
|
| + /*
|
| + * The "write exec" statement causes Ragel to emit the ragel machine's
|
| + * execution code.
|
| + */
|
| %% write exec;
|
|
|
| /*
|
| @@ -1227,8 +1285,12 @@
|
| * Check the direct jumps. All the targets from jump_dests must be in
|
| * valid_targets.
|
| */
|
| - result &= ProcessInvalidJumpTargets(data, size, valid_targets, jump_dests,
|
| - user_callback, callback_data);
|
| + result &= ProcessInvalidJumpTargets(codeblock,
|
| + size,
|
| + valid_targets,
|
| + jump_dests,
|
| + user_callback,
|
| + callback_data);
|
|
|
| /* We only use malloc for a large code sequences */
|
| if (jump_dests != &jump_dests_small) free(jump_dests);
|
|
|