| Index: src/trusted/validator_ragel/validator_x86_64.rl
|
| ===================================================================
|
| --- src/trusted/validator_ragel/validator_x86_64.rl (revision 9944)
|
| +++ src/trusted/validator_ragel/validator_x86_64.rl (working copy)
|
| @@ -4,6 +4,14 @@
|
| * found in the LICENSE file.
|
| */
|
|
|
| +/*
|
| + * This is the core of amd64-mode validator. Please note that this file
|
| + * combines ragel machine description and C language actions. Please read
|
| + * validator_internals.html first to understand how the whole thing is built:
|
| + * it explains how the byte sequences are constructed, what constructs like
|
| + * “@{}” or “REX_WRX?” mean, etc.
|
| + */
|
| +
|
| #include <assert.h>
|
| #include <errno.h>
|
| #include <stddef.h>
|
| @@ -11,7 +19,7 @@
|
| #include <stdlib.h>
|
| #include <string.h>
|
|
|
| -#include "native_client/src/trusted/validator_ragel/unreviewed/validator_internal.h"
|
| +#include "native_client/src/trusted/validator_ragel/validator_internal.h"
|
|
|
| %%{
|
| machine x86_64_validator;
|
| @@ -49,13 +57,20 @@
|
| "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
|
| include immediate_fields_parsing_amd64
|
| "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
|
| + # rel8 actions are used in relative jumps with 8-bit offset.
|
| action rel8_operand {
|
| rel8_operand(current_position + 1, data, jump_dests, size,
|
| &instruction_info_collected);
|
| }
|
| + # rel16 actions are used in relative jums with 16-bit offset.
|
| + #
|
| + # Such instructions should be included in the validator's DFA, but we can not
|
| + # just exlude them because they are refenced in relative_fields_parsing ragel
|
| + # machine. Ensure compilations error in case of usage.
|
| action rel16_operand {
|
| #error rel16_operand should never be used in nacl
|
| }
|
| + # rel32 actions are used in relative calls and jumps with 32-bit offset.
|
| action rel32_operand {
|
| rel32_operand(current_position + 1, data, jump_dests, size,
|
| &instruction_info_collected);
|
| @@ -79,25 +94,25 @@
|
| }
|
|
|
| action process_0_operands {
|
| - process_0_operands(&restricted_register, &instruction_info_collected);
|
| + Process0Operands(&restricted_register, &instruction_info_collected);
|
| }
|
| action process_1_operand {
|
| - process_1_operand(&restricted_register, &instruction_info_collected,
|
| - rex_prefix, operand_states);
|
| + Process1Operand(&restricted_register, &instruction_info_collected,
|
| + rex_prefix, operand_states);
|
| }
|
| action process_1_operand_zero_extends {
|
| - process_1_operand_zero_extends(&restricted_register,
|
| - &instruction_info_collected, rex_prefix,
|
| - operand_states);
|
| + Process1OperandZeroExtends(&restricted_register,
|
| + &instruction_info_collected, rex_prefix,
|
| + operand_states);
|
| }
|
| action process_2_operands {
|
| - process_2_operands(&restricted_register, &instruction_info_collected,
|
| - rex_prefix, operand_states);
|
| + Process2Operands(&restricted_register, &instruction_info_collected,
|
| + rex_prefix, operand_states);
|
| }
|
| action process_2_operands_zero_extends {
|
| - process_2_operands_zero_extends(&restricted_register,
|
| - &instruction_info_collected, rex_prefix,
|
| - operand_states);
|
| + Process2OperandsZeroExtends(&restricted_register,
|
| + &instruction_info_collected, rex_prefix,
|
| + operand_states);
|
| }
|
|
|
| include decode_x86_64 "validator_x86_64_instruction.rl";
|
| @@ -105,29 +120,46 @@
|
| data16condrep = (data16 | condrep data16 | data16 condrep);
|
| data16rep = (data16 | rep data16 | data16 rep);
|
|
|
| - # Special %rbp modifications without required sandboxing
|
| + # Special %rbp modifications—the ones which don't need a sandboxing.
|
| + #
|
| + # Note that there are two different opcodes for “mov”: “mov” with opcode
|
| + # “0x89” moves from “A” to “B” while “mov” with opcode “0x8b” moves from
|
| + # “B” to “A”.
|
| rbp_modifications =
|
| (b_0100_10x0 0x89 0xe5) | # mov %rsp,%rbp
|
| - (b_0100_10x0 0x8b 0xec) # | mov %rsp,%rbp
|
| - #(b_0100_1xx0 0x81 0xe5 any{3} (0x80 .. 0xff)) | # and $XXX,%rbp
|
| - #(b_0100_1xx0 0x83 0xe5 (0x80 .. 0xff)) # and $XXX,%rbp
|
| + (b_0100_10x0 0x8b 0xec) # mov %rsp,%rbp
|
| @process_0_operands;
|
|
|
| - # Special instructions used for %rbp sandboxing
|
| + # Special instructions used for %rbp sandboxing.
|
| + #
|
| + # This is the “second half” of the %rbp sandboxing. Any zero-extending
|
| + # instruction which stores the data in %ebp can be first part, but unlike
|
| + # the situation with other “normal” registers you can not just write to
|
| + # %ebp and continue: such activity MUST restore the status quo immediately
|
| + # via one of these instructions.
|
| rbp_sandboxing =
|
| - (b_0100_11x0 0x01 0xfd | # add %r15,%rbp
|
| - b_0100_10x1 0x03 0xef | # add %r15,%rbp
|
| - 0x49 0x8d 0x2c 0x2f | # lea (%r15,%rbp,1),%rbp
|
| - 0x4a 0x8d 0x6c 0x3d 0x00) # lea 0x0(%rbp,%r15,1),%rbp
|
| + (b_0100_11x0 0x01 0xfd | # add %r15,%rbp
|
| + b_0100_10x1 0x03 0xef | # add %r15,%rbp
|
| + 0x49 0x8d 0x2c 0x2f | # lea (%r15,%rbp,1),%rbp
|
| + 0x4a 0x8d 0x6c 0x3d 0x00) # lea 0x0(%rbp,%r15,1),%rbp
|
| + # “Normal” instructions detect an error when confronted with restricted
|
| + # register %rbp. These instructions require this state instead.
|
| + #
|
| + # Check this precondition and mark the beginning of the instruction as
|
| + # invalid jump for target.
|
| @{ if (restricted_register == REG_RBP)
|
| instruction_info_collected |= RESTRICTED_REGISTER_USED;
|
| else
|
| instruction_info_collected |= UNRESTRICTED_RBP_PROCESSED;
|
| restricted_register = NO_REG;
|
| - BitmapClearBit(valid_targets, (instruction_start - data));
|
| + MakeInvalidJumpTarget((instruction_start - data), valid_targets);
|
| };
|
|
|
| - # Special %rbp modifications without required sandboxing
|
| + # Special %rsp modifications—the ones which don't need a sandboxing.
|
| + #
|
| + # Note that there are two different opcodes for “mov”: “mov” with opcode
|
| + # “0x89” moves from “A” to “B” while “mov” with opcode “0x8b” moves from
|
| + # “B” to “A”.
|
| rsp_modifications =
|
| (b_0100_10x0 0x89 0xec) | # mov %rbp,%rsp
|
| (b_0100_10x0 0x8b 0xe5) | # mov %rbp,%rsp
|
| @@ -137,115 +169,177 @@
|
| (b_0100_1000 0x83 0xe4 (0x80 .. 0xff)) # and $XXX,%rsp
|
| @process_0_operands;
|
|
|
| - # Special instructions used for %rbp sandboxing
|
| + # Special instructions used for %rsp sandboxing.
|
| + #
|
| + # This is the “second half” of the %rsp sandboxing. Any zero-extending
|
| + # instruction which stores the data in %esp can be first part, but unlike
|
| + # the situation with other “normal” registers you can not just write to
|
| + # %esp and continue: such activity MUST restore the status quo immediately
|
| + # via one of these instructions.
|
| rsp_sandboxing =
|
| - (b_0100_11x0 0x01 0xfc | # add %r15,%rsp
|
| - b_0100_10x1 0x03 0xe7 | # add %r15,%rbp
|
| - 0x4a 0x8d 0x24 0x3c) # lea (%rsp,%r15,1),%rsp
|
| + (b_0100_11x0 0x01 0xfc | # add %r15,%rsp
|
| + b_0100_10x1 0x03 0xe7 | # add %r15,%rsp
|
| + 0x4a 0x8d 0x24 0x3c) # lea (%rsp,%r15,1),%rsp
|
| + # “Normal” instructions detect an error when confronted with restricted
|
| + # register %rsp. These instructions require this state instead.
|
| + #
|
| + # Check this precondition and mark the beginning of the instruction as
|
| + # invalid jump for target.
|
| @{ if (restricted_register == REG_RSP)
|
| instruction_info_collected |= RESTRICTED_REGISTER_USED;
|
| else
|
| instruction_info_collected |= UNRESTRICTED_RSP_PROCESSED;
|
| restricted_register = NO_REG;
|
| - BitmapClearBit(valid_targets, (instruction_start - data));
|
| + MakeInvalidJumpTarget((instruction_start - data), valid_targets);
|
| };
|
|
|
| - # naclcall or nacljmp. Note: first "and $~0x1f, %eXX" is a normal instruction
|
| - # and as such will detect case where %rbp/%rsp is illegally modified.
|
| + # naclcall or nacljmp. These are three-instruction indirection-jump sequences.
|
| + # and $~0x1f, %eXX
|
| + # and RBASE, %rXX
|
| + # jmpq *%rXX (or: callq *%rXX)
|
| + # Note: first "and $~0x1f, %eXX" is a normal instruction and as such will
|
| + # detect case where %rbp/%rsp is illegally modified when this machine will be
|
| + # combined with normal_instruction machine.
|
| + #
|
| + # There are number of variants present which differ by the REX prefix usage:
|
| + # we need to make sure “%eXX” in “and”, “%rXX” in “add”, and “%eXX” in “jmpq”
|
| + # or “callq” is the same register and it's much simpler to do if one single
|
| + # action handles only fixed number of bytes.
|
| + #
|
| + # Additional complication arises because x86-64 contains two different “add”
|
| + # instruction: with “0x01” and “0x03” opcode. They differ in the direction
|
| + # used: both can add “A” and “B” but one of them stores the result in “A” and
|
| + # other stores the result in “B” (see AMD/Intel manual for clarification).
|
| + # Both should be allowed.
|
| + #
|
| + # REGISTER USAGE ABBREVIATIONS:
|
| + # E86: legacy ia32 registers (all eight: %eax to %edi)
|
| + # R86: 64-bit counterparts for legacy 386 registers (%rax to %rdi)
|
| + # E64: 32-bit counterparts for new amd64 registers (%r8d to %r14d)
|
| + # R64: new amd64 registers (only seven: %r8 to %r14)
|
| + # RBASE: %r15 (used as “base of untrusted world” in NaCl for amd64)
|
| naclcall_or_nacljmp =
|
| - # and $~0x1f, %eax/%ecx/%edx/%ebx/%esp/%ebp/%esi/%edi
|
| - (0x83 (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7) 0xe0
|
| - # add %r15,%rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi
|
| - b_0100_11x0 0x01 (0xf8|0xf9|0xfa|0xfb|0xfc|0xfd|0xfe|0xff)
|
| - # callq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi
|
| - ((REX_WRX? 0xff (0xd0|0xd1|0xd2|0xd3|0xd4|0xd5|0xd6|0xd7)) |
|
| - # jmpq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi
|
| - (REX_WRX? 0xff (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7))))
|
| - @{
|
| - instruction_start -= 6;
|
| - if (RMFromModRM(instruction_start[1]) !=
|
| - RMFromModRM(instruction_start[5]) ||
|
| - RMFromModRM(instruction_start[1]) != RMFromModRM(*current_position))
|
| - instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 3);
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 6);
|
| - restricted_register = NO_REG;
|
| - } |
|
| + # This block encodes call and jump superinstructions of the form:
|
| + # 0: 83 e_ e0 and $~0x1f,E86
|
| + # 3: 4_ 01 f_ add RBASE,R86
|
| + # 6: ff e_ jmpq *R86
|
| + #### INSTRUCTION ONE (three bytes)
|
| + # and $~0x1f, E86
|
| + (0x83 b_11_100_xxx 0xe0
|
| + #### INSTRUCTION TWO (three bytes)
|
| + # add RBASE, R86 (0x01 opcode)
|
| + b_0100_11x0 0x01 b_11_111_xxx
|
| + #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
|
| + # callq R86
|
| + ((REX_WRX? 0xff b_11_010_xxx) |
|
| + #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
|
| + # jmpq R86
|
| + (REX_WRX? 0xff b_11_100_xxx)))
|
| + @{
|
| + ProcessNaclCallOrJmpAddToRMNoRex(&instruction_info_collected,
|
| + &instruction_start, current_position,
|
| + data, valid_targets);
|
| + } |
|
|
|
| - # and $~0x1f, %eax/%ecx/%edx/%ebx/%esp/%ebp/%esi/%edi
|
| - (0x83 (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7) 0xe0
|
| - # add %r15,%rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi
|
| - b_0100_10x1 0x03 (0xc7|0xcf|0xd7|0xdf|0xe7|0xef|0xf7|0xff)
|
| - # callq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi
|
| - ((REX_WRX? 0xff (0xd0|0xd1|0xd2|0xd3|0xd4|0xd5|0xd6|0xd7)) |
|
| - # jmpq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi
|
| - (REX_WRX? 0xff (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7))))
|
| - @{
|
| - instruction_start -= 6;
|
| - if (RMFromModRM(instruction_start[1]) !=
|
| - RegFromModRM(instruction_start[5]) ||
|
| - RMFromModRM(instruction_start[1]) != RMFromModRM(*current_position))
|
| - instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 3);
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 6);
|
| - restricted_register = NO_REG;
|
| - } |
|
| + # This block encodes call and jump superinstructions of the form:
|
| + # 0: 83 e_ e0 and $~0x1f,E86
|
| + # 3: 4_ 03 f_ add RBASE,R86
|
| + # 6: ff e_ jmpq *R86
|
| + #### INSTRUCTION ONE (three bytes)
|
| + # and $~0x1f, E86
|
| + (0x83 b_11_100_xxx 0xe0
|
| + #### INSTRUCTION TWO (three bytes)
|
| + # add RBASE, R86 (0x03 opcode)
|
| + b_0100_10x1 0x03 b_11_xxx_111
|
| + #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
|
| + # callq R86
|
| + ((REX_WRX? 0xff b_11_010_xxx) |
|
| + #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
|
| + # jmpq R86
|
| + (REX_WRX? 0xff b_11_100_xxx)))
|
| + @{
|
| + ProcessNaclCallOrJmpAddToRegNoRex(&instruction_info_collected,
|
| + &instruction_start, current_position,
|
| + data, valid_targets);
|
| + } |
|
|
|
| - # rex.R?X? and $~0x1f, %eax/%ecx/%edx/%ebx/%esp/%ebp/%esi/%edi
|
| - ((REX_RX 0x83 (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7) 0xe0
|
| - # add %r15,%rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi
|
| - b_0100_11x0 0x01 (0xf8|0xf9|0xfa|0xfb|0xfc|0xfd|0xfe|0xff)
|
| - # callq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi
|
| - ((REX_WRX? 0xff (0xd0|0xd1|0xd2|0xd3|0xd4|0xd5|0xd6|0xd7)) |
|
| - # jmpq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi
|
| - (REX_WRX? 0xff (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7)))) |
|
| + # This block encodes call and jump superinstructions of the form:
|
| + # 0: 4_ 83 e_ e0 and $~0x1f,E86
|
| + # 4: 4_ 01 f_ add RBASE,R86
|
| + # 7: ff e_ jmpq *R86
|
| + #### INSTRUCTION ONE (four bytes)
|
| + # and $~0x1f, E86
|
| + ((REX_RX 0x83 b_11_100_xxx 0xe0
|
| + #### INSTRUCTION TWO (three bytes)
|
| + # add RBASE, R86 (0x01 opcode)
|
| + b_0100_11x0 0x01 b_11_111_xxx
|
| + #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
|
| + # callq R86
|
| + ((REX_WRX? 0xff b_11_010_xxx) |
|
| + #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
|
| + # jmpq R86
|
| + (REX_WRX? 0xff b_11_100_xxx))) |
|
|
|
| - # and $~0x1f, %r8d/%r9d/%r10d/%r11d/%r12d/%r13d/%r14d
|
| - (b_0100_0xx1 0x83 (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6) 0xe0
|
| - # add %r15, %r8d/%r9d/%r10d/%r11d/%r12d/%r13d/%r14d
|
| - b_0100_11x1 0x01 (0xf8|0xf9|0xfa|0xfb|0xfc|0xfd|0xfe)
|
| - # callq %r8/%r9/%r10/%r11/%r12/%r13/%r14
|
| - ((b_0100_xxx1 0xff (0xd0|0xd1|0xd2|0xd3|0xd4|0xd5|0xd6)) |
|
| - # jmpq %r8/%r9/%r10/%r11/%r12/%r13/%r14
|
| - (b_0100_xxx1 0xff (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6)))))
|
| - @{
|
| - instruction_start -= 7;
|
| - if (RMFromModRM(instruction_start[2]) !=
|
| - RMFromModRM(instruction_start[6]) ||
|
| - RMFromModRM(instruction_start[2]) != RMFromModRM(*current_position))
|
| - instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 4);
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 7);
|
| - restricted_register = NO_REG;
|
| - } |
|
| + # This block encodes call and jump superinstructions of the form:
|
| + # 0: 4_ 83 e_ e0 and $~0x1f,E64
|
| + # 4: 4_ 01 f_ add RBASE,R64
|
| + # 7: 4_ ff e_ jmpq *R64
|
| + #### INSTRUCTION ONE (four bytes)
|
| + # and $~0x1f, E64
|
| + (b_0100_0xx1 0x83 (b_11_100_xxx - b_11_100_111) 0xe0
|
| + #### INSTRUCTION TWO (three bytes)
|
| + # add RBASE, R64 (0x01 opcode)
|
| + b_0100_11x1 0x01 (b_11_111_xxx - b_11_111_111)
|
| + #### INSTRUCTION THREE: call (three bytes)
|
| + # callq R64
|
| + ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) |
|
| + #### INSTRUCTION THREE: jmp (three bytes)
|
| + # jmpq R64
|
| + (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111)))))
|
| + @{
|
| + ProcessNaclCallOrJmpAddToRMWithRex(&instruction_info_collected,
|
| + &instruction_start, current_position,
|
| + data, valid_targets);
|
| + } |
|
|
|
| - # rex.R?X? and $~0x1f, %eax/%ecx/%edx/%ebx/%esp/%ebp/%esi/%edi
|
| - ((REX_RX 0x83 (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7) 0xe0
|
| - # add %r15,%rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi
|
| - b_0100_10x1 0x03 (0xc7|0xcf|0xd7|0xdf|0xe7|0xef|0xf7|0xff)
|
| - # callq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi
|
| - ((REX_WRX? 0xff (0xd0|0xd1|0xd2|0xd3|0xd4|0xd5|0xd6|0xd7)) |
|
| - # jmpq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi
|
| - (REX_WRX? 0xff (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7)))) |
|
| + # This block encodes call and jump superinstructions of the form:
|
| + # 0: 4_ 83 e_ e0 and $~0x1f,E86
|
| + # 4: 4_ 03 f_ add RBASE,R86
|
| + # 7: ff e_ jmpq *R86
|
| + #### INSTRUCTION ONE (four bytes)
|
| + # and $~0x1f, E86
|
| + ((REX_RX 0x83 b_11_100_xxx 0xe0
|
| + #### INSTRUCTION TWO (three bytes)
|
| + # add RBASE, R86 (0x03 opcode)
|
| + b_0100_10x1 0x03 b_11_xxx_111
|
| + #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
|
| + # callq R86
|
| + ((REX_WRX? 0xff b_11_010_xxx) |
|
| + #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
|
| + # jmpq R86
|
| + (REX_WRX? 0xff b_11_100_xxx))) |
|
|
|
| - # and $~0x1f, %r8d/%r9d/%r10d/%r11d/%r12d/%r13d/%r14d
|
| - (b_0100_0xx1 0x83 (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6) 0xe0
|
| - # add %r15, %r8d/%r9d/%r10d/%r11d/%r12d/%r13d/%r14d
|
| - b_0100_11x1 0x03 (0xc7|0xcf|0xd7|0xdf|0xe7|0xef|0xf7)
|
| - # callq %r8/%r9/%r10/%r11/%r12/%r13/%r14
|
| - ((b_0100_xxx1 0xff (0xd0|0xd1|0xd2|0xd3|0xd4|0xd5|0xd6)) |
|
| - # jmpq %r8/%r9/%r10/%r11/%r12/%r13/%r14
|
| - (b_0100_xxx1 0xff (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6)))))
|
| - @{
|
| - instruction_start -= 7;
|
| - if (RMFromModRM(instruction_start[2]) !=
|
| - RegFromModRM(instruction_start[6]) ||
|
| - RMFromModRM(instruction_start[2]) != RMFromModRM(*current_position))
|
| - instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 4);
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 7);
|
| - restricted_register = NO_REG;
|
| - };
|
| + # This block encodes call and jump superinstructions of the form:
|
| + # 0: 4_ 83 e_ e0 and $~0x1f,E64
|
| + # 4: 4_ 03 f_ add RBASE,R64
|
| + # 7: 4_ ff e_ jmpq *R64
|
| + #### INSTRUCTION ONE (four bytes)
|
| + # and $~0x1f, E64
|
| + (b_0100_0xx1 0x83 (b_11_100_xxx - b_11_100_111) 0xe0
|
| + #### INSTRUCTION TWO (three bytes)
|
| + # add RBASE, R64 (0x03 opcode)
|
| + b_0100_11x1 0x03 (b_11_xxx_111 - b_11_111_111)
|
| + #### INSTRUCTION THREE: call (three bytes)
|
| + # callq R64
|
| + ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) |
|
| + #### INSTRUCTION THREE: jmp (three bytes)
|
| + # jmpq R64
|
| + (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111)))))
|
| + @{
|
| + ProcessNaclCallOrJmpAddToRegWithRex(&instruction_info_collected,
|
| + &instruction_start, current_position,
|
| + data, valid_targets);
|
| + };
|
|
|
| # EMMS/SSE2/AVX instructions which have implicit %ds:(%rsi) operand
|
| # maskmovq %mmX,%mmY
|
| @@ -267,7 +361,7 @@
|
| string_instruction_rsi_no_rdi =
|
| (rep? 0xac | # lods %ds:(%rsi),%al
|
| data16rep 0xad | # lods %ds:(%rsi),%ax
|
| - rep? REXW_NONE? 0xad) ; # lods %ds:(%rsi),%eax/%rax
|
| + rep? REXW_NONE? 0xad); # lods %ds:(%rsi),%eax/%rax
|
|
|
| # String instructions which use only %ds:(%rdi)
|
| string_instruction_rdi_no_rsi =
|
| @@ -277,7 +371,7 @@
|
|
|
| rep? 0xaa | # stos %al,%es:(%rdi)
|
| data16rep 0xab | # stos %ax,%es:(%rdi)
|
| - rep? REXW_NONE? 0xab ; # stos %eax/%rax,%es:(%rdi)
|
| + rep? REXW_NONE? 0xab; # stos %eax/%rax,%es:(%rdi)
|
|
|
| # String instructions which use both %ds:(%rsi) and %ds:(%rdi)
|
| string_instruction_rsi_rdi =
|
| @@ -287,107 +381,107 @@
|
|
|
| rep? 0xa4 | # movsb %es:(%rdi),%ds:(%rsi)
|
| data16rep 0xa5 | # movsw %es:(%rdi),%ds:(%rsi)
|
| - rep? REXW_NONE? 0xa5 ; # movs[lq] %es:(%rdi),%ds:(%rsi)
|
| + rep? REXW_NONE? 0xa5; # movs[lq] %es:(%rdi),%ds:(%rsi)
|
|
|
| + # Superinstruction which handle instructions which require sandboxed %rsi.
|
| + #
|
| + # There are two variants which handle spurious REX prefixes.
|
| + #
|
| + # Note that both “0x89 0xf6” and “0x8b 0xf6” encode “mov %esi,%esi”:
|
| + # “mov” with opcode “0x89” moves from “A” to “B” while “mov” with opcode
|
| + # “0x8b” moves from “B” to “A” but when “A” and “B” happen to denote the
|
| + # same register there are no functional difference between these opcodes.
|
| sandbox_instruction_rsi_no_rdi =
|
| - (0x89 | 0x8b) 0xf6 . # mov %esi,%esi
|
| - 0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi
|
| + (0x89 | 0x8b) 0xf6 # mov %esi,%esi
|
| + 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
|
| string_instruction_rsi_no_rdi
|
| @{
|
| - instruction_start -= 6;
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 2);
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 6);
|
| - restricted_register = NO_REG;
|
| + SandboxRxiSuperInstNoRexOnMov(&instruction_start, data, valid_targets);
|
| } |
|
|
|
| - REX_X (0x89 | 0x8b) 0xf6 . # mov %esi,%esi
|
| - 0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi
|
| + REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi
|
| + 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
|
| string_instruction_rsi_no_rdi
|
| @{
|
| - instruction_start -= 7;
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 3);
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 7);
|
| - restricted_register = NO_REG;
|
| + SandboxRxiSuperInstWithRexOnMov(&instruction_start, data, valid_targets);
|
| };
|
|
|
| + # Superinstruction which handle instructions which require sandboxed %rdi.
|
| + #
|
| + # There are two variants which handle spurious REX prefixes.
|
| + #
|
| + # Note that both “0x89 0xff” and “0x8b 0xff” encode “mov %edi,%edi”:
|
| + # “mov” with opcode “0x89” moves from “A” to “B” while “mov” with opcode
|
| + # “0x8b” moves from “B” to “A” but when “A” and “B” happen to denote the
|
| + # same register there are no functional difference between these opcodes.
|
| sandbox_instruction_rdi_no_rsi =
|
| - (0x89 | 0x8b) 0xff . # mov %edi,%edi
|
| - 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi
|
| + (0x89 | 0x8b) 0xff # mov %edi,%edi
|
| + 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi
|
| (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction)
|
| @{
|
| - instruction_start -= 6;
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 2);
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 6);
|
| - restricted_register = NO_REG;
|
| + SandboxRxiSuperInstNoRexOnMov(&instruction_start, data, valid_targets);
|
| } |
|
|
|
| - REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi
|
| - 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi
|
| + REX_X (0x89 | 0x8b) 0xff # mov %edi,%edi
|
| + 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi
|
| (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction)
|
| @{
|
| - instruction_start -= 7;
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 3);
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 7);
|
| - restricted_register = NO_REG;
|
| + SandboxRxiSuperInstWithRexOnMov(&instruction_start, data, valid_targets);
|
| };
|
|
|
|
|
| - # String instructions which use both %ds:(%rsi) and %ds:(%rdi)
|
| + # Superinstruction which handle instructions which require both sandboxed %rsi
|
| + # and sandboxed %rdi.
|
| + #
|
| + # There are four variants which handle spurious REX prefixes.
|
| + #
|
| + # Note that both “0x89 0xf6” and “0x8b 0xf6” encode “mov %esi,%esi” while both
|
| + # “0x89 0xff” and “0x8b 0xff” encode “mov %edi,%edi”: “mov” with opcode “0x89”
|
| + # moves from “A” to “B” while “mov” with opcode “0x8b” moves from “B” to “A”
|
| + # but when “A” and “B” happen to denote the same register there are no
|
| + # functional difference between these opcodes.
|
| + #
|
| + # Note: we call SandboxRxiSuperInst in actions here twice because we have two
|
| + # sandboxings here - one for %rsi and one for %rdi.
|
| sandbox_instruction_rsi_rdi =
|
| - (0x89 | 0x8b) 0xf6 . # mov %esi,%esi
|
| - 0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi
|
| - (0x89 | 0x8b) 0xff . # mov %edi,%edi
|
| - 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi
|
| + (0x89 | 0x8b) 0xf6 # mov %esi,%esi
|
| + 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
|
| + (0x89 | 0x8b) 0xff # mov %edi,%edi
|
| + 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi
|
| string_instruction_rsi_rdi
|
| @{
|
| - instruction_start -= 12;
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 2);
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 6);
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 8);
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 12);
|
| - restricted_register = NO_REG;
|
| + SandboxRxiSuperInstNoRexOnMov(&instruction_start, data, valid_targets);
|
| + SandboxRxiSuperInstNoRexOnMov(&instruction_start, data, valid_targets);
|
| } |
|
|
|
| - (0x89 | 0x8b) 0xf6 . # mov %esi,%esi
|
| - 0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi
|
| - REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi
|
| - 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi
|
| + (0x89 | 0x8b) 0xf6 # mov %esi,%esi
|
| + 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
|
| + REX_X (0x89 | 0x8b) 0xff # mov %edi,%edi
|
| + 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi
|
| string_instruction_rsi_rdi
|
| @{
|
| - instruction_start -= 13;
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 2);
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 6);
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 9);
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 13);
|
| - restricted_register = NO_REG;
|
| + SandboxRxiSuperInstWithRexOnMov(&instruction_start, data, valid_targets);
|
| + SandboxRxiSuperInstNoRexOnMov(&instruction_start, data, valid_targets);
|
| } |
|
|
|
| - REX_X (0x89 | 0x8b) 0xf6 . # mov %esi,%esi
|
| - 0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi
|
| - (0x89 | 0x8b) 0xff . # mov %edi,%edi
|
| - 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi
|
| + REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi
|
| + 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
|
| + (0x89 | 0x8b) 0xff # mov %edi,%edi
|
| + 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi
|
| string_instruction_rsi_rdi
|
| @{
|
| - instruction_start -= 13;
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 3);
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 7);
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 9);
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 13);
|
| - restricted_register = NO_REG;
|
| + SandboxRxiSuperInstNoRexOnMov(&instruction_start, data, valid_targets);
|
| + SandboxRxiSuperInstWithRexOnMov(&instruction_start, data, valid_targets);
|
| } |
|
|
|
| - REX_X (0x89 | 0x8b) 0xf6 . # mov %esi,%esi
|
| - 0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi
|
| - REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi
|
| - 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi
|
| + REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi
|
| + 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
|
| + REX_X (0x89 | 0x8b) 0xff # mov %edi,%edi
|
| + 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi
|
| string_instruction_rsi_rdi
|
| @{
|
| - instruction_start -= 14;
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 3);
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 7);
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 10);
|
| - BitmapClearBit(valid_targets, (instruction_start - data) + 14);
|
| - restricted_register = NO_REG;
|
| + SandboxRxiSuperInstWithRexOnMov(&instruction_start, data, valid_targets);
|
| + SandboxRxiSuperInstWithRexOnMov(&instruction_start, data, valid_targets);
|
| };
|
|
|
| special_instruction =
|
| @@ -399,6 +493,9 @@
|
| sandbox_instruction_rsi_no_rdi |
|
| sandbox_instruction_rdi_no_rsi |
|
| sandbox_instruction_rsi_rdi)
|
| + # Mark the instruction as special—currently this information is used only in
|
| + # tests, but in the future we may use it for dynamic code modification
|
| + # support.
|
| @{
|
| instruction_info_collected |= SPECIAL_INSTRUCTION;
|
| };
|
| @@ -406,7 +503,10 @@
|
| # Remove special instructions which are only allowed in special cases.
|
| normal_instruction = one_instruction - special_instruction;
|
|
|
| - # Check if call is properly aligned
|
| + # Check if call is properly aligned.
|
| + #
|
| + # For direct call we explicitly encode all variations. For indirect call
|
| + # we accept all the special instructions which ends with indirect call.
|
| call_alignment =
|
| ((normal_instruction &
|
| # Direct call
|
| @@ -417,6 +517,8 @@
|
| # Indirect call
|
| (any* data16? REX_WRXB? 0xff ((opcode_2 | opcode_3) any* &
|
| (modrm_memory | modrm_registers)))))
|
| + # Call instruction must aligned to the end of bundle. Previously this was
|
| + # strict requirement, today it's just warning to aid with debugging.
|
| @{
|
| if (((current_position - data) & kBundleMask) != kBundleMask)
|
| instruction_info_collected |= BAD_CALL_ALIGNMENT;
|
| @@ -424,9 +526,18 @@
|
|
|
|
|
| main := ((call_alignment | normal_instruction | special_instruction)
|
| + # Beginning of the instruction is always valid target for jump. If this
|
| + # instruction is, in fact, part of the superinstruction then we'll clear
|
| + # that bit later.
|
| >{
|
| - BitmapSetBit(valid_targets, current_position - data);
|
| + MakeJumpTargetValid(current_position - data, valid_targets);
|
| }
|
| + # Here we call the user callback if there are validation errors or if the
|
| + # CALL_USER_CALLBACK_ON_EACH_INSTRUCTION option is used.
|
| + #
|
| + # After that we move instruction_start and clean all the variables which
|
| + # only used in the processing of a single instruction (prefixes, operand
|
| + # states and instruction_info_collected).
|
| @{
|
| if ((instruction_info_collected & VALIDATION_ERRORS_MASK) ||
|
| (options & CALL_USER_CALLBACK_ON_EACH_INSTRUCTION)) {
|
| @@ -442,6 +553,7 @@
|
| instruction_start = current_position + 1;
|
| instruction_info_collected = 0;
|
| SET_REX_PREFIX(FALSE);
|
| + /* Top three bis of VEX2 are inverted: see AMD/Intel manual. */
|
| SET_VEX_PREFIX2(0xe0);
|
| SET_VEX_PREFIX3(0x00);
|
| operand_states = 0;
|
|
|