| Index: src/trusted/validator_ragel/unreviewed/validator_x86_64.rl
|
| ===================================================================
|
| --- src/trusted/validator_ragel/unreviewed/validator_x86_64.rl (revision 10976)
|
| +++ src/trusted/validator_ragel/unreviewed/validator_x86_64.rl (working copy)
|
| @@ -1,1238 +0,0 @@
|
| -/*
|
| - * Copyright (c) 2012 The Native Client Authors. All rights reserved.
|
| - * Use of this source code is governed by a BSD-style license that can be
|
| - * found in the LICENSE file.
|
| - */
|
| -
|
| -/*
|
| - * This is the core of amd64-mode validator. Please note that this file
|
| - * combines ragel machine description and C language actions. Please read
|
| - * validator_internals.html first to understand how the whole thing is built:
|
| - * it explains how the byte sequences are constructed, what constructs like
|
| - * "@{}" or "REX_WRX?" mean, etc.
|
| - */
|
| -
|
| -#include <assert.h>
|
| -#include <errno.h>
|
| -#include <stddef.h>
|
| -#include <stdio.h>
|
| -#include <stdlib.h>
|
| -#include <string.h>
|
| -
|
| -#include "native_client/src/trusted/validator_ragel/bitmap.h"
|
| -#include "native_client/src/trusted/validator_ragel/unreviewed/validator_internal.h"
|
| -
|
| -%%{
|
| - machine x86_64_validator;
|
| - alphtype unsigned char;
|
| - variable p current_position;
|
| - variable pe end_of_bundle;
|
| - variable eof end_of_bundle;
|
| - variable cs current_state;
|
| -
|
| - include byte_machine "byte_machines.rl";
|
| -
|
| - include prefixes_parsing_validator
|
| - "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
|
| - include rex_actions
|
| - "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
|
| - include rex_parsing
|
| - "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
|
| - include vex_actions_amd64
|
| - "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
|
| - include vex_parsing_amd64
|
| - "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
|
| - include displacement_fields_actions
|
| - "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
|
| - include displacement_fields_parsing
|
| - "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
|
| - include modrm_actions_amd64
|
| - "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
|
| - include modrm_parsing
|
| - "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
|
| - include operand_actions_amd64
|
| - "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
|
| - include immediate_fields_actions
|
| - "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
|
| - include immediate_fields_parsing_amd64
|
| - "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
|
| - include relative_fields_validator_actions
|
| - "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
|
| - include relative_fields_parsing
|
| - "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
|
| - include cpuid_actions
|
| - "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
|
| -
|
| - action check_access {
|
| - CheckAccess(instruction_begin - data, base, index, restricted_register,
|
| - valid_targets, &instruction_info_collected);
|
| - }
|
| -
|
| - # Action which marks last byte as not immediate. Most 3DNow! instructions,
|
| - # some AVX and XOP instructions have this proerty. It's referenced by
|
| - # decode_x86_32 machine in [autogenerated] "validator_x86_32_instruction.rl"
|
| - # file.
|
| - action last_byte_is_not_immediate {
|
| - instruction_info_collected |= LAST_BYTE_IS_NOT_IMMEDIATE;
|
| - }
|
| -
|
| - action modifiable_instruction {
|
| - instruction_info_collected |= MODIFIABLE_INSTRUCTION;
|
| - }
|
| -
|
| - action process_0_operands {
|
| - Process0Operands(&restricted_register, &instruction_info_collected);
|
| - }
|
| - action process_1_operand {
|
| - Process1Operand(&restricted_register, &instruction_info_collected,
|
| - rex_prefix, operand_states);
|
| - }
|
| - action process_1_operand_zero_extends {
|
| - Process1OperandZeroExtends(&restricted_register,
|
| - &instruction_info_collected, rex_prefix,
|
| - operand_states);
|
| - }
|
| - action process_2_operands {
|
| - Process2Operands(&restricted_register, &instruction_info_collected,
|
| - rex_prefix, operand_states);
|
| - }
|
| - action process_2_operands_zero_extends {
|
| - Process2OperandsZeroExtends(&restricted_register,
|
| - &instruction_info_collected, rex_prefix,
|
| - operand_states);
|
| - }
|
| -
|
| - include decode_x86_64 "validator_x86_64_instruction.rl";
|
| -
|
| - # Special %rbp modifications - the ones which don't need a sandboxing.
|
| - #
|
| - # Note that there are two different opcodes for "mov": in x86-64 there are two
|
| - # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move
|
| - # from REG field to RM or in the other direction thus there are two encodings
|
| - # for the register-to-register move.
|
| - rbp_modifications =
|
| - (b_0100_10x0 0x89 0xe5 | # mov %rsp,%rbp
|
| - b_0100_10x0 0x8b 0xec) # mov %rsp,%rbp
|
| - @process_0_operands;
|
| -
|
| - # Special instructions used for %rbp sandboxing.
|
| - #
|
| - # This is the "second half" of the %rbp sandboxing. Any zero-extending
|
| - # instruction which stores the data in %ebp can be first half, but unlike
|
| - # the situation with other "normal" registers you can not just write to
|
| - # %ebp and continue: such activity MUST restore the status quo immediately
|
| - # via one of these instructions.
|
| - rbp_sandboxing =
|
| - (b_0100_11x0 0x01 0xfd | # add %r15,%rbp
|
| - b_0100_10x1 0x03 0xef | # add %r15,%rbp
|
| - # Note that unlike %rsp case, there is no 'lea (%rbp,%r15,1),%rbp'
|
| - # instruction (it gets assembled as 'lea 0x00(%rbp,%r15,1),%rbp').
|
| - 0x4a 0x8d 0x6c 0x3d 0x00 | # lea 0x00(%rbp,%r15,1),%rbp
|
| - 0x4a 0x8d 0xac 0x3d 0x00 0x00 0x00 0x00) # lea 0x00000000(%rbp,%r15,1),%rbp
|
| - # Note: restricted_register keeps the restricted register as explained in
|
| - # http://www.chromium.org/nativeclient/design-documents/nacl-sfi-model-on-x86-64-systems
|
| - #
|
| - # "Normal" instructions can not be used in a place where %rbp is restricted.
|
| - # But since these instructions are "second half" of the %rbp sandboxing they
|
| - # can be used *only* when %rbp is restricted.
|
| - #
|
| - # That is (normal instruction):
|
| - # mov %eax,%ebp
|
| - # mov %esi,%edi <- Error: %ebp is restricted
|
| - # vs
|
| - # mov %esi,%edi
|
| - # add %r15,%rbp <- Error: %ebp is *not* restricted
|
| - # vs
|
| - # mov %eax,%ebp
|
| - # add %r15,%rbp <- Ok: %rbp is restricted as it should be
|
| - #
|
| - # Check this precondition and mark the beginning of the instruction as
|
| - # invalid jump for target.
|
| - @{ if (restricted_register == REG_RBP)
|
| - instruction_info_collected |= RESTRICTED_REGISTER_USED;
|
| - else
|
| - instruction_info_collected |= UNRESTRICTED_RBP_PROCESSED;
|
| - restricted_register = NO_REG;
|
| - UnmarkValidJumpTarget((instruction_begin - data), valid_targets);
|
| - };
|
| -
|
| - # Special %rsp modifications - the ones which don't need a sandboxing.
|
| - #
|
| - # Note that there are two different opcodes for "mov": in x86-64 there are two
|
| - # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move
|
| - # from REG field to RM or in the other direction thus there are two encodings
|
| - # for the register-to-register move.
|
| - rsp_modifications =
|
| - (b_0100_10x0 0x89 0xec | # mov %rbp,%rsp
|
| - b_0100_10x0 0x8b 0xe5 | # mov %rbp,%rsp
|
| - # Superfluous bits are not supported:
|
| - # http://code.google.com/p/nativeclient/issues/detail?id=3012
|
| - b_0100_1000 0x83 0xe4 (0x80 .. 0xff)) # and $XXX,%rsp
|
| - @process_0_operands;
|
| -
|
| - # Special instructions used for %rsp sandboxing.
|
| - #
|
| - # This is the "second half" of the %rsp sandboxing. Any zero-extending
|
| - # instruction which stores the data in %esp can be first half, but unlike
|
| - # the situation with other "normal" registers you can not just write to
|
| - # %esp and continue: such activity MUST restore the status quo immediately
|
| - # via one of these instructions.
|
| - rsp_sandboxing =
|
| - (b_0100_11x0 0x01 0xfc | # add %r15,%rsp
|
| - b_0100_10x1 0x03 0xe7 | # add %r15,%rsp
|
| - # OR can be used as well, see
|
| - # http://code.google.com/p/nativeclient/issues/detail?id=3070
|
| - b_0100_11x0 0x09 0xfc | # or %r15,%rsp
|
| - b_0100_10x1 0x0b 0xe7 | # or %r15,%rsp
|
| - 0x4a 0x8d 0x24 0x3c | # lea (%rsp,%r15,1),%rsp
|
| - 0x4a 0x8d 0x64 0x3c 0x00 | # lea 0x00(%rsp,%r15,1),%rsp
|
| - 0x4a 0x8d 0xa4 0x3c 0x00 0x00 0x00 0x00) # lea 0x00000000(%rsp,%r15,1),%rsp
|
| - # Note: restricted_register keeps the restricted register as explained in
|
| - # http://www.chromium.org/nativeclient/design-documents/nacl-sfi-model-on-x86-64-systems
|
| - #
|
| - # "Normal" instructions can not be used in a place where %rsp is restricted.
|
| - # But since these instructions are "second half" of the %rsp sandboxing they
|
| - # can be used *only* when %rsp is restricted.
|
| - #
|
| - # That is (normal instruction):
|
| - # mov %eax,%esp
|
| - # mov %esi,%edi <- Error: %esp is restricted
|
| - # vs
|
| - # mov %esi,%edi
|
| - # add %r15,%rsp <- Error: %esp is *not* restricted
|
| - # vs
|
| - # mov %eax,%esp
|
| - # add %r15,%rsp <- Ok: %rsp is restricted as it should be
|
| - #
|
| - # Check this precondition and mark the beginning of the instruction as
|
| - # invalid jump for target.
|
| - @{ if (restricted_register == REG_RSP)
|
| - instruction_info_collected |= RESTRICTED_REGISTER_USED;
|
| - else
|
| - instruction_info_collected |= UNRESTRICTED_RSP_PROCESSED;
|
| - restricted_register = NO_REG;
|
| - UnmarkValidJumpTarget((instruction_begin - data), valid_targets);
|
| - };
|
| -
|
| - # naclcall or nacljmp. These are three-instruction indirection-jump sequences.
|
| - # and $~0x1f, %eXX
|
| - # and RBASE, %rXX
|
| - # jmpq *%rXX (or: callq *%rXX)
|
| - # Note: first "and $~0x1f, %eXX" is a normal instruction (it can occur not
|
| - # just as part of the naclcall/nacljmp, but also as a standolene instruction).
|
| - #
|
| - # This means that when naclcall_or_nacljmp ragel machine will be combined with
|
| - # "normal_instruction*" regular action process_1_operand_zero_extends will be
|
| - # triggered when main ragel machine will accept "and $~0x1f, %eXX" x86-64
|
| - # instruction. This action will check if %rbp/%rsp is legally modified thus
|
| - # we don't need to duplicate this logic in naclcall_or_nacljmp ragel machine.
|
| - #
|
| - # There are number of variants present which differ by the REX prefix usage:
|
| - # we need to make sure "%eXX" in "and", "%rXX" in "add", and "%eXX" in "jmpq"
|
| - # or "callq" is the same register and it's much simpler to do if one single
|
| - # action handles only fixed number of bytes.
|
| - #
|
| - # Additional complication arises because x86-64 contains two different "add"
|
| - # instruction: with "0x01" and "0x03" opcode. They differ in the direction
|
| - # used: both can encode "add %src_register, %dst_register", but the first one
|
| - # uses field REG of the ModR/M byte for the src and field RM of the ModR/M
|
| - # byte for the dst while last one uses field RM of the ModR/M byte for the src
|
| - # and field REG of the ModR/M byte for dst. Both should be allowed.
|
| - #
|
| - # See AMD/Intel manual for clarification "add" instruction encoding.
|
| - #
|
| - # REGISTER USAGE ABBREVIATIONS:
|
| - # E86: legacy ia32 registers (all eight: %eax to %edi)
|
| - # R86: 64-bit counterparts for legacy 386 registers (%rax to %rdi)
|
| - # E64: 32-bit counterparts for new amd64 registers (%r8d to %r14d)
|
| - # R64: new amd64 registers (only seven: %r8 to %r14)
|
| - # RBASE: %r15 (used as "base of untrusted world" in NaCl for amd64)
|
| - naclcall_or_nacljmp =
|
| - # This block encodes call and jump "superinstruction" of the following form:
|
| - # 0: 83 e_ e0 and $~0x1f,E86
|
| - # 3: 4_ 01 f_ add RBASE,R86
|
| - # 6: ff e_ jmpq *R86
|
| - #### INSTRUCTION ONE (three bytes)
|
| - # and $~0x1f, E86
|
| - (0x83 b_11_100_xxx 0xe0
|
| - #### INSTRUCTION TWO (three bytes)
|
| - # add RBASE, R86 (0x01 opcode)
|
| - b_0100_11x0 0x01 b_11_111_xxx
|
| - #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
|
| - # callq R86
|
| - ((REX_WRX? 0xff b_11_010_xxx) |
|
| - #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
|
| - # jmpq R86
|
| - (REX_WRX? 0xff b_11_100_xxx)))
|
| - @{
|
| - ProcessNaclCallOrJmpAddToRMNoRex(&instruction_info_collected,
|
| - &instruction_begin, current_position,
|
| - data, valid_targets);
|
| - } |
|
| -
|
| - # This block encodes call and jump "superinstruction" of the following form:
|
| - # 0: 83 e_ e0 and $~0x1f,E86
|
| - # 3: 4_ 03 _f add RBASE,R86
|
| - # 6: ff e_ jmpq *R86
|
| - #### INSTRUCTION ONE (three bytes)
|
| - # and $~0x1f, E86
|
| - (0x83 b_11_100_xxx 0xe0
|
| - #### INSTRUCTION TWO (three bytes)
|
| - # add RBASE, R86 (0x03 opcode)
|
| - b_0100_10x1 0x03 b_11_xxx_111
|
| - #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
|
| - # callq R86
|
| - ((REX_WRX? 0xff b_11_010_xxx) |
|
| - #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
|
| - # jmpq R86
|
| - (REX_WRX? 0xff b_11_100_xxx)))
|
| - @{
|
| - ProcessNaclCallOrJmpAddToRegNoRex(&instruction_info_collected,
|
| - &instruction_begin, current_position,
|
| - data, valid_targets);
|
| - } |
|
| -
|
| - # This block encodes call and jump "superinstruction" of the following form:
|
| - # 0: 4_ 83 e_ e0 and $~0x1f,E86
|
| - # 4: 4_ 01 f_ add RBASE,R86
|
| - # 7: ff e_ jmpq *R86
|
| - #### INSTRUCTION ONE (four bytes)
|
| - # and $~0x1f, E86
|
| - ((REX_RX 0x83 b_11_100_xxx 0xe0
|
| - #### INSTRUCTION TWO (three bytes)
|
| - # add RBASE, R86 (0x01 opcode)
|
| - b_0100_11x0 0x01 b_11_111_xxx
|
| - #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
|
| - # callq R86
|
| - ((REX_WRX? 0xff b_11_010_xxx) |
|
| - #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
|
| - # jmpq R86
|
| - (REX_WRX? 0xff b_11_100_xxx))) |
|
| -
|
| - # This block encodes call and jump "superinstruction" of the following form:
|
| - # 0: 4_ 83 e_ e0 and $~0x1f,E64
|
| - # 4: 4_ 01 f_ add RBASE,R64
|
| - # 7: 4_ ff e_ jmpq *R64
|
| - #### INSTRUCTION ONE (four bytes)
|
| - # and $~0x1f, E64
|
| - (b_0100_0xx1 0x83 (b_11_100_xxx - b_11_100_111) 0xe0
|
| - #### INSTRUCTION TWO (three bytes)
|
| - # add RBASE, R64 (0x01 opcode)
|
| - b_0100_11x1 0x01 (b_11_111_xxx - b_11_111_111)
|
| - #### INSTRUCTION THREE: call (three bytes)
|
| - # callq R64
|
| - ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) |
|
| - #### INSTRUCTION THREE: jmp (three bytes)
|
| - # jmpq R64
|
| - (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111)))))
|
| - @{
|
| - ProcessNaclCallOrJmpAddToRMWithRex(&instruction_info_collected,
|
| - &instruction_begin, current_position,
|
| - data, valid_targets);
|
| - } |
|
| -
|
| - # This block encodes call and jump "superinstruction" of the following form:
|
| - # 0: 4_ 83 e_ e0 and $~0x1f,E86
|
| - # 4: 4_ 03 _f add RBASE,R86
|
| - # 7: ff e_ jmpq *R86
|
| - #### INSTRUCTION ONE (four bytes)
|
| - # and $~0x1f, E86
|
| - ((REX_RX 0x83 b_11_100_xxx 0xe0
|
| - #### INSTRUCTION TWO (three bytes)
|
| - # add RBASE, R86 (0x03 opcode)
|
| - b_0100_10x1 0x03 b_11_xxx_111
|
| - #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
|
| - # callq R86
|
| - ((REX_WRX? 0xff b_11_010_xxx) |
|
| - #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
|
| - # jmpq R86
|
| - (REX_WRX? 0xff b_11_100_xxx))) |
|
| -
|
| - # This block encodes call and jump "superinstruction" of the following form:
|
| - # 0: 4_ 83 e_ e0 and $~0x1f,E64
|
| - # 4: 4_ 03 _f add RBASE,R64
|
| - # 7: 4_ ff e_ jmpq *R64
|
| - #### INSTRUCTION ONE (four bytes)
|
| - # and $~0x1f, E64
|
| - (b_0100_0xx1 0x83 (b_11_100_xxx - b_11_100_111) 0xe0
|
| - #### INSTRUCTION TWO (three bytes)
|
| - # add RBASE, R64 (0x03 opcode)
|
| - b_0100_11x1 0x03 (b_11_xxx_111 - b_11_111_111)
|
| - #### INSTRUCTION THREE: call (three bytes)
|
| - # callq R64
|
| - ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) |
|
| - #### INSTRUCTION THREE: jmp (three bytes)
|
| - # jmpq R64
|
| - (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111)))))
|
| - @{
|
| - ProcessNaclCallOrJmpAddToRegWithRex(&instruction_info_collected,
|
| - &instruction_begin, current_position,
|
| - data, valid_targets);
|
| - };
|
| -
|
| - # EMMX/SSE/SSE2/AVX instructions which have implicit %ds:(%rsi) operand
|
| -
|
| - # maskmovq %mmX,%mmY (EMMX or SSE)
|
| - maskmovq = REX_WRXB? 0x0f 0xf7 @CPUFeature_EMMXSSE modrm_registers;
|
| -
|
| - # maskmovdqu %xmmX, %xmmY (SSE2)
|
| - maskmovdqu = 0x66 REX_WRXB? 0x0f 0xf7 @CPUFeature_SSE2 modrm_registers;
|
| -
|
| - # vmaskmovdqu %xmmX, %xmmY (AVX)
|
| - vmaskmovdqu = ((0xc4 (VEX_RB & VEX_map00001) b_0_1111_0_01) |
|
| - (0xc5 b_X_1111_0_01)) 0xf7 @CPUFeature_AVX modrm_registers;
|
| -
|
| - mmx_sse_rdi_instruction = maskmovq | maskmovdqu | vmaskmovdqu;
|
| -
|
| - # Temporary fix: for string instructions combination of data16 and rep(ne)
|
| - # prefixes is disallowed to mimic old validator behavior.
|
| - # See http://code.google.com/p/nativeclient/issues/detail?id=1950
|
| -
|
| - # data16rep = (data16 | rep data16 | data16 rep);
|
| - # data16condrep = (data16 | condrep data16 | data16 condrep);
|
| - data16rep = data16;
|
| - data16condrep = data16;
|
| -
|
| - # String instructions which use only %ds:(%rsi)
|
| - string_instruction_rsi_no_rdi =
|
| - (rep? 0xac | # lods %ds:(%rsi),%al
|
| - data16rep 0xad | # lods %ds:(%rsi),%ax
|
| - rep? REXW_NONE? 0xad); # lods %ds:(%rsi),%eax/%rax
|
| -
|
| - # String instructions which use only %ds:(%rdi)
|
| - string_instruction_rdi_no_rsi =
|
| - condrep? 0xae | # scas %es:(%rdi),%al
|
| - data16condrep 0xaf | # scas %es:(%rdi),%ax
|
| - condrep? REXW_NONE? 0xaf | # scas %es:(%rdi),%eax/%rax
|
| -
|
| - rep? 0xaa | # stos %al,%es:(%rdi)
|
| - data16rep 0xab | # stos %ax,%es:(%rdi)
|
| - rep? REXW_NONE? 0xab; # stos %eax/%rax,%es:(%rdi)
|
| -
|
| - # String instructions which use both %ds:(%rsi) and %es:(%rdi)
|
| - string_instruction_rsi_rdi =
|
| - condrep? 0xa6 | # cmpsb %es:(%rdi),%ds:(%rsi)
|
| - data16condrep 0xa7 | # cmpsw %es:(%rdi),%ds:(%rsi)
|
| - condrep? REXW_NONE? 0xa7 | # cmps[lq] %es:(%rdi),%ds:(%rsi)
|
| -
|
| - rep? 0xa4 | # movsb %ds:(%rsi),%es:(%rdi)
|
| - data16rep 0xa5 | # movsw %ds:(%rsi),%es:(%rdi)
|
| - rep? REXW_NONE? 0xa5; # movs[lq] %ds:(%rsi),%es:(%rdi)
|
| -
|
| - # "Superinstruction" which includes %rsi sandboxing.
|
| - #
|
| - # There are two variants which handle spurious REX prefixes.
|
| - #
|
| - # Note that both "0x89 0xf6" and "0x8b 0xf6" encode "mov %edi,%edi": in x86-64
|
| - # there are two fields in ModR/M byte (REG field and RM field) and "mov" may
|
| - # be used to move from REG field to RM or in the other direction thus there
|
| - # are two encodings for the register-to-register move (and since REG and RM
|
| - # are identical here only opcode differs).
|
| - sandbox_instruction_rsi_no_rdi =
|
| - (0x89 | 0x8b) 0xf6 # mov %esi,%esi
|
| - 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
|
| - string_instruction_rsi_no_rdi
|
| - @{
|
| - ExpandSuperinstructionBySandboxingBytes(
|
| - 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);
|
| - } |
|
| -
|
| - REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi
|
| - 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
|
| - string_instruction_rsi_no_rdi
|
| - @{
|
| - ExpandSuperinstructionBySandboxingBytes(
|
| - 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);
|
| - };
|
| -
|
| - # "Superinstruction" which includes %rdi sandboxing.
|
| - #
|
| - # There are two variants which handle spurious REX prefixes.
|
| - #
|
| - # Note that both "0x89 0xff" and "0x8b 0xff" encode "mov %edi,%edi": in x86-64
|
| - # there are two fields in ModR/M byte (REG field and RM field) and "mov" may
|
| - # be used to move from REG field to RM or in the other direction thus there
|
| - # are two encodings for the register-to-register move (and since REG and RM
|
| - # are identical here only opcode differs).
|
| - sandbox_instruction_rdi_no_rsi =
|
| - (0x89 | 0x8b) 0xff # mov %edi,%edi
|
| - 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi
|
| - (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction)
|
| - @{
|
| - ExpandSuperinstructionBySandboxingBytes(
|
| - 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);
|
| - } |
|
| -
|
| - REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi
|
| - 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi
|
| - (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction)
|
| - @{
|
| - ExpandSuperinstructionBySandboxingBytes(
|
| - 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);
|
| - };
|
| -
|
| -
|
| - # "Superinstruction" which includes both %rsi and %rdi sandboxing.
|
| - #
|
| - # There are four variants which handle spurious REX prefixes.
|
| - #
|
| - # Note that both "0x89 0xf6" and "0x8b 0xf6" encode "mov %esi,%esi" while both
|
| - # "0x89 0xff" and "0x8b 0xff" encode "mov %edi,%edi": in x86-64 there are two
|
| - # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move
|
| - # from REG field to RM or in the other direction thus there are two encodings
|
| - # for the register-to-register move (and since REG and RM are identical here
|
| - # only opcode differs).
|
| - sandbox_instruction_rsi_rdi =
|
| - (0x89 | 0x8b) 0xf6 # mov %esi,%esi
|
| - 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
|
| - (0x89 | 0x8b) 0xff # mov %edi,%edi
|
| - 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi
|
| - string_instruction_rsi_rdi
|
| - @{
|
| - ExpandSuperinstructionBySandboxingBytes(
|
| - 2 /* mov */ + 4 /* lea */ + 2 /* mov */ + 4 /* lea */,
|
| - &instruction_begin, data, valid_targets);
|
| - } |
|
| -
|
| - (((0x89 | 0x8b) 0xf6 # mov %esi,%esi
|
| - 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
|
| - REX_X (0x89 | 0x8b) 0xff # mov %edi,%edi
|
| - 0x49 0x8d 0x3c 0x3f) | # lea (%r15,%rdi,1),%rdi
|
| -
|
| - (REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi
|
| - 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
|
| - (0x89 | 0x8b) 0xff # mov %edi,%edi
|
| - 0x49 0x8d 0x3c 0x3f)) # lea (%r15,%rdi,1),%rdi
|
| - string_instruction_rsi_rdi
|
| - @{
|
| - ExpandSuperinstructionBySandboxingBytes(
|
| - 2 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */
|
| - /* == 3 (* mov *) + 4 (* lea *) + 2 (* mov *) + 4 (* lea *) */,
|
| - &instruction_begin, data, valid_targets);
|
| - } |
|
| -
|
| - REX_X (0x89 | 0x8b) 0xf6 . # mov %esi,%esi
|
| - 0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi
|
| - REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi
|
| - 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi
|
| - string_instruction_rsi_rdi
|
| - @{
|
| - ExpandSuperinstructionBySandboxingBytes(
|
| - 3 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */,
|
| - &instruction_begin, data, valid_targets);
|
| - };
|
| -
|
| - # All the "special" instructions (== instructions which obey non-standard
|
| - # rules). Three groups:
|
| - # * %rsp/%rsp related instructions (these instructions are special because
|
| - # they must be in the range %r15...%r15+4294967295 except momentarily they
|
| - # can be in the range 0...4294967295)
|
| - # * string instructions (which can not use %r15 as base and thus need special
|
| - # handling both in compiler and validator)
|
| - # * naclcall/nacljmp (indirect jumps need special care)
|
| - special_instruction =
|
| - (rbp_modifications |
|
| - rsp_modifications |
|
| - rbp_sandboxing |
|
| - rsp_sandboxing |
|
| - sandbox_instruction_rsi_no_rdi |
|
| - sandbox_instruction_rdi_no_rsi |
|
| - sandbox_instruction_rsi_rdi |
|
| - naclcall_or_nacljmp)
|
| - # Mark the instruction as special - currently this information is used only
|
| - # in tests, but in the future we may use it for dynamic code modification
|
| - # support.
|
| - @{
|
| - instruction_info_collected |= SPECIAL_INSTRUCTION;
|
| - };
|
| -
|
| - # Remove special instructions which are only allowed in special cases.
|
| - normal_instruction = one_instruction - special_instruction;
|
| -
|
| - # Check if call is properly aligned.
|
| - #
|
| - # For direct call we explicitly encode all variations. For indirect call
|
| - # we accept all the special instructions which ends with register-addressed
|
| - # indirect call.
|
| - call_alignment =
|
| - ((normal_instruction &
|
| - # Direct call
|
| - ((data16 REX_RXB? 0xe8 rel16) |
|
| - (REX_WRXB? 0xe8 rel32) |
|
| - (data16 REXW_RXB 0xe8 rel32))) |
|
| - (special_instruction &
|
| - # Indirect call
|
| - (any* data16? REX_WRXB? 0xff ((opcode_2 | opcode_3) any* &
|
| - modrm_registers))))
|
| - # Call instruction must aligned to the end of bundle. Previously this was
|
| - # strict requirement, today it's just warning to aid with debugging.
|
| - @{
|
| - if (((current_position - data) & kBundleMask) != kBundleMask)
|
| - instruction_info_collected |= BAD_CALL_ALIGNMENT;
|
| - };
|
| -
|
| - # This action calls user's callback (if needed) and cleans up validator's
|
| - # internal state.
|
| - #
|
| - # We call the user callback if there are validation errors or if the
|
| - # CALL_USER_CALLBACK_ON_EACH_INSTRUCTION option is used.
|
| - #
|
| - # After that we move instruction_begin and clean all the variables which
|
| - # only used in the processing of a single instruction (prefixes, operand
|
| - # states and instruction_info_collected).
|
| - action end_of_instruction_cleanup {
|
| - /* Call user-supplied callback. */
|
| - instruction_end = current_position + 1;
|
| - if ((instruction_info_collected & VALIDATION_ERRORS_MASK) ||
|
| - (options & CALL_USER_CALLBACK_ON_EACH_INSTRUCTION)) {
|
| - result &= user_callback(
|
| - instruction_begin, instruction_end,
|
| - instruction_info_collected |
|
| - ((restricted_register << RESTRICTED_REGISTER_SHIFT) &
|
| - RESTRICTED_REGISTER_MASK), callback_data);
|
| - }
|
| -
|
| - /* On successful match the instruction_begin must point to the next byte
|
| - * to be able to report the new offset as the start of instruction
|
| - * causing error. */
|
| - instruction_begin = instruction_end;
|
| -
|
| - /* Mark start of the next instruction as a valid target for jump.
|
| - * Note: we mark start of the next instruction here, not start of the
|
| - * current one because memory access check should be able to clear this
|
| - * bit when restricted register is used. */
|
| - MarkValidJumpTarget(instruction_begin - data, valid_targets);
|
| -
|
| - /* Clear variables. */
|
| - instruction_info_collected = 0;
|
| - SET_REX_PREFIX(FALSE);
|
| - /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */
|
| - SET_VEX_PREFIX2(VEX_R | VEX_X | VEX_B);
|
| - SET_VEX_PREFIX3(0x00);
|
| - operand_states = 0;
|
| - base = 0;
|
| - index = 0;
|
| - }
|
| -
|
| - # This action reports fatal error detected by DFA.
|
| - action report_fatal_error {
|
| - result &= user_callback(instruction_begin, current_position,
|
| - UNRECOGNIZED_INSTRUCTION, callback_data);
|
| - /*
|
| - * Process the next bundle: "continue" here is for the "for" cycle in
|
| - * the ValidateChunkAMD64 function.
|
| - *
|
| - * It does not affect the case which we really care about (when code
|
| - * is validatable), but makes it possible to detect more errors in one
|
| - * run in tools like ncval.
|
| - */
|
| - continue;
|
| - }
|
| -
|
| - # This is main ragel machine: it does 99% of validation work. There are only
|
| - # one thing to do with bundle if this machine accepts the bundle:
|
| - # * check for the state of the restricted_register at the end of the bundle.
|
| - # It's an error is %rbp or %rsp is restricted at the end of the bundle.
|
| - # Additionally if all the bundles are fine you need to check that direct jumps
|
| - # are corect. Thiis is done in the following way:
|
| - # * DFA fills two arrays: valid_targets and jump_dests.
|
| - # * ProcessInvalidJumpTargets checks that "jump_dests & !valid_targets == 0".
|
| - # All other checks are done here.
|
| -
|
| - main := ((call_alignment | normal_instruction | special_instruction)
|
| - @end_of_instruction_cleanup)*
|
| - $!report_fatal_error;
|
| -
|
| -}%%
|
| -
|
| -%% write data;
|
| -
|
| -enum OperandKind {
|
| - OPERAND_SANDBOX_IRRELEVANT = 0,
|
| - /*
|
| - * Currently we do not distinguish 8bit and 16bit modifications from
|
| - * OPERAND_SANDBOX_UNRESTRICTED to match the behavior of the old validator.
|
| - *
|
| - * 8bit operands must be distinguished from other types because the REX prefix
|
| - * regulates the choice between %ah and %spl, as well as %ch and %bpl.
|
| - */
|
| - OPERAND_SANDBOX_8BIT,
|
| - OPERAND_SANDBOX_RESTRICTED,
|
| - OPERAND_SANDBOX_UNRESTRICTED
|
| -};
|
| -
|
| -#define SET_OPERAND_NAME(N, S) operand_states |= ((S) << ((N) << 3))
|
| -#define SET_OPERAND_TYPE(N, T) SET_OPERAND_TYPE_ ## T(N)
|
| -#define SET_OPERAND_TYPE_OPERAND_TYPE_8_BIT(N) \
|
| - operand_states |= OPERAND_SANDBOX_8BIT << (5 + ((N) << 3))
|
| -#define SET_OPERAND_TYPE_OPERAND_TYPE_16_BIT(N) \
|
| - operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((N) << 3))
|
| -#define SET_OPERAND_TYPE_OPERAND_TYPE_32_BIT(N) \
|
| - operand_states |= OPERAND_SANDBOX_RESTRICTED << (5 + ((N) << 3))
|
| -#define SET_OPERAND_TYPE_OPERAND_TYPE_64_BIT(N) \
|
| - operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((N) << 3))
|
| -#define CHECK_OPERAND(N, S, T) \
|
| - ((operand_states & (0xff << ((N) << 3))) == ((S | (T << 5)) << ((N) << 3)))
|
| -
|
| -static INLINE void CheckAccess(ptrdiff_t instruction_begin,
|
| - enum OperandName base,
|
| - enum OperandName index,
|
| - uint8_t restricted_register,
|
| - bitmap_word *valid_targets,
|
| - uint32_t *instruction_info_collected) {
|
| - if ((base == REG_RIP) || (base == REG_R15) ||
|
| - (base == REG_RSP) || (base == REG_RBP)) {
|
| - if ((index == NO_REG) || (index == REG_RIZ))
|
| - { /* do nothing. */ }
|
| - else if (index == restricted_register)
|
| - BitmapClearBit(valid_targets, instruction_begin),
|
| - *instruction_info_collected |= RESTRICTED_REGISTER_USED;
|
| - else
|
| - *instruction_info_collected |= UNRESTRICTED_INDEX_REGISTER;
|
| - } else {
|
| - *instruction_info_collected |= FORBIDDEN_BASE_REGISTER;
|
| - }
|
| -}
|
| -
|
| -
|
| -static INLINE void Process0Operands(enum OperandName *restricted_register,
|
| - uint32_t *instruction_info_collected) {
|
| - /* Restricted %rsp or %rbp must be processed by appropriate nacl-special
|
| - * instruction, not with regular instruction. */
|
| - if (*restricted_register == REG_RSP) {
|
| - *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
|
| - } else if (*restricted_register == REG_RBP) {
|
| - *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
|
| - }
|
| - *restricted_register = NO_REG;
|
| -}
|
| -
|
| -static INLINE void Process1Operand(enum OperandName *restricted_register,
|
| - uint32_t *instruction_info_collected,
|
| - uint8_t rex_prefix,
|
| - uint32_t operand_states) {
|
| - /* Restricted %rsp or %rbp must be processed by appropriate nacl-special
|
| - * instruction, not with regular instruction. */
|
| - if (*restricted_register == REG_RSP) {
|
| - *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
|
| - } else if (*restricted_register == REG_RBP) {
|
| - *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
|
| - }
|
| - *restricted_register = NO_REG;
|
| - if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) ||
|
| - CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
|
| - CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) {
|
| - *instruction_info_collected |= R15_MODIFIED;
|
| - } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
|
| - CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_RESTRICTED) ||
|
| - CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) {
|
| - *instruction_info_collected |= BPL_MODIFIED;
|
| - } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
|
| - CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_RESTRICTED) ||
|
| - CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) {
|
| - *instruction_info_collected |= SPL_MODIFIED;
|
| - }
|
| -}
|
| -
|
| -static INLINE void Process1OperandZeroExtends(
|
| - enum OperandName *restricted_register,
|
| - uint32_t *instruction_info_collected,
|
| - uint8_t rex_prefix,
|
| - uint32_t operand_states) {
|
| - /* Restricted %rsp or %rbp must be processed by appropriate nacl-special
|
| - * instruction, not with regular instruction. */
|
| - if (*restricted_register == REG_RSP) {
|
| - *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
|
| - } else if (*restricted_register == REG_RBP) {
|
| - *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
|
| - }
|
| - *restricted_register = NO_REG;
|
| - if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) ||
|
| - CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
|
| - CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) {
|
| - *instruction_info_collected |= R15_MODIFIED;
|
| - } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
|
| - CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) {
|
| - *instruction_info_collected |= BPL_MODIFIED;
|
| - } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
|
| - CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) {
|
| - *instruction_info_collected |= SPL_MODIFIED;
|
| - /* Take 2 bits of operand type from operand_states as *restricted_register,
|
| - * make sure operand_states denotes a register (4th bit == 0). */
|
| - } else if ((operand_states & 0x70) == (OPERAND_SANDBOX_RESTRICTED << 5)) {
|
| - *restricted_register = operand_states & 0x0f;
|
| - }
|
| -}
|
| -
|
| -static INLINE void Process2Operands(enum OperandName *restricted_register,
|
| - uint32_t *instruction_info_collected,
|
| - uint8_t rex_prefix,
|
| - uint32_t operand_states) {
|
| - /* Restricted %rsp or %rbp must be processed by appropriate nacl-special
|
| - * instruction, not with regular instruction. */
|
| - if (*restricted_register == REG_RSP) {
|
| - *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
|
| - } else if (*restricted_register == REG_RBP) {
|
| - *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
|
| - }
|
| - *restricted_register = NO_REG;
|
| - if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) ||
|
| - CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
|
| - CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED) ||
|
| - CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_8BIT) ||
|
| - CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
|
| - CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) {
|
| - *instruction_info_collected |= R15_MODIFIED;
|
| - } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
|
| - CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_RESTRICTED) ||
|
| - CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED) ||
|
| - (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
|
| - CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_RESTRICTED) ||
|
| - CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) {
|
| - *instruction_info_collected |= BPL_MODIFIED;
|
| - } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
|
| - CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_RESTRICTED) ||
|
| - CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED) ||
|
| - (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
|
| - CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_RESTRICTED) ||
|
| - CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) {
|
| - *instruction_info_collected |= SPL_MODIFIED;
|
| - }
|
| -}
|
| -
|
| -static INLINE void Process2OperandsZeroExtends(
|
| - enum OperandName *restricted_register,
|
| - uint32_t *instruction_info_collected,
|
| - uint8_t rex_prefix,
|
| - uint32_t operand_states) {
|
| - /* Restricted %rsp or %rbp must be processed by appropriate nacl-special
|
| - * instruction, not with regular instruction. */
|
| - if (*restricted_register == REG_RSP) {
|
| - *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
|
| - } else if (*restricted_register == REG_RBP) {
|
| - *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
|
| - }
|
| - *restricted_register = NO_REG;
|
| - if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) ||
|
| - CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
|
| - CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED) ||
|
| - CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_8BIT) ||
|
| - CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
|
| - CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) {
|
| - *instruction_info_collected |= R15_MODIFIED;
|
| - } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
|
| - CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED) ||
|
| - (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
|
| - CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) {
|
| - *instruction_info_collected |= BPL_MODIFIED;
|
| - } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
|
| - CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED) ||
|
| - (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
|
| - CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) {
|
| - *instruction_info_collected |= SPL_MODIFIED;
|
| - /* Take 2 bits of operand type from operand_states as *restricted_register,
|
| - * make sure operand_states denotes a register (4th bit == 0). */
|
| - } else if ((operand_states & 0x70) == (OPERAND_SANDBOX_RESTRICTED << 5)) {
|
| - *restricted_register = operand_states & 0x0f;
|
| - if (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_RESTRICTED)) {
|
| - *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
|
| - } else if (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_RESTRICTED)) {
|
| - *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
|
| - }
|
| - /* Take 2 bits of operand type from operand_states as *restricted_register,
|
| - * make sure operand_states denotes a register (12th bit == 0). */
|
| - } else if ((operand_states & 0x7000) == (OPERAND_SANDBOX_RESTRICTED << 13)) {
|
| - *restricted_register = (operand_states & 0x0f00) >> 8;
|
| - }
|
| -}
|
| -
|
| -/*
|
| - * This function merges "dangerous" instruction with sandboxing instructions to
|
| - * get a "superinstruction" and unmarks in-between jump targets.
|
| - */
|
| -static INLINE void ExpandSuperinstructionBySandboxingBytes(
|
| - size_t sandbox_instructions_size,
|
| - const uint8_t **instruction_begin,
|
| - const uint8_t *data,
|
| - bitmap_word *valid_targets) {
|
| - *instruction_begin -= sandbox_instructions_size;
|
| - /*
|
| - * We need to unmark start of the "dangerous" instruction itself, too, but we
|
| - * don't need to mark the beginning of the whole "superinstruction" - that's
|
| - * why we move start by one byte and don't change the length.
|
| - */
|
| - UnmarkValidJumpTargets((*instruction_begin + 1 - data),
|
| - sandbox_instructions_size,
|
| - valid_targets);
|
| -}
|
| -
|
| -/*
|
| - * Return TRUE if naclcall or nacljmp uses the same register in all three
|
| - * instructions.
|
| - *
|
| - * This version is for the case where "add %src_register, %dst_register" with
|
| - * dst in RM field and src in REG field of ModR/M byte is used.
|
| - *
|
| - * There are five possible forms:
|
| - *
|
| - * 0: 83 eX e0 and $~0x1f,E86
|
| - * 3: 4? 01 fX add RBASE,R86
|
| - * 6: ff eX jmpq *R86
|
| - * ^ ^
|
| - * instruction_begin current_position
|
| - *
|
| - * 0: 4? 83 eX e0 and $~0x1f,E86
|
| - * 4: 4? 01 fX add RBASE,R86
|
| - * 7: ff eX jmpq *R86
|
| - * ^ ^
|
| - * instruction_begin current_position
|
| - *
|
| - * 0: 83 eX e0 and $~0x1f,E86
|
| - * 3: 4? 01 fX add RBASE,R86
|
| - * 6: 4? ff eX jmpq *R86
|
| - * ^ ^
|
| - * instruction_begin current_position
|
| - *
|
| - * 0: 4? 83 eX e0 and $~0x1f,E86
|
| - * 4: 4? 01 fX add RBASE,R86
|
| - * 7: 4? ff eX jmpq *R86
|
| - * ^ ^
|
| - * instruction_begin current_position
|
| - *
|
| - * 0: 4? 83 eX e0 and $~0x1f,E64
|
| - * 4: 4? 01 fX add RBASE,R64
|
| - * 7: 4? ff eX jmpq *R64
|
| - * ^ ^
|
| - * instruction_begin current_position
|
| - *
|
| - * We don't care about "?" (they are checked by DFA).
|
| - */
|
| -static INLINE Bool VerifyNaclCallOrJmpAddToRM(const uint8_t *instruction_begin,
|
| - const uint8_t *current_position) {
|
| - return
|
| - RMFromModRM(instruction_begin[-5]) == RMFromModRM(instruction_begin[-1]) &&
|
| - RMFromModRM(instruction_begin[-5]) == RMFromModRM(current_position[0]);
|
| -}
|
| -
|
| -/*
|
| - * Return TRUE if naclcall or nacljmp uses the same register in all three
|
| - * instructions.
|
| - *
|
| - * This version is for the case where "add %src_register, %dst_register" with
|
| - * dst in REG field and src in RM field of ModR/M byte is used.
|
| - *
|
| - * There are five possible forms:
|
| - *
|
| - * 0: 83 eX e0 and $~0x1f,E86
|
| - * 3: 4? 03 Xf add RBASE,R86
|
| - * 6: ff eX jmpq *R86
|
| - * ^ ^
|
| - * instruction_begin current_position
|
| - *
|
| - * 0: 4? 83 eX e0 and $~0x1f,E86
|
| - * 4: 4? 03 Xf add RBASE,R86
|
| - * 7: ff eX jmpq *R86
|
| - * ^ ^
|
| - * instruction_begin current_position
|
| - *
|
| - * 0: 83 eX e0 and $~0x1f,E86
|
| - * 3: 4? 03 Xf add RBASE,R86
|
| - * 6: 4? ff eX jmpq *R86
|
| - * ^ ^
|
| - * instruction_begin current_position
|
| - *
|
| - * 0: 4? 83 eX e0 and $~0x1f,E86
|
| - * 4: 4? 03 Xf add RBASE,R86
|
| - * 7: 4? ff eX jmpq *R86
|
| - * ^ ^
|
| - * instruction_begin current_position
|
| - *
|
| - * 0: 4? 83 eX e0 and $~0x1f,E64
|
| - * 4: 4? 03 Xf add RBASE,R64
|
| - * 7: 4? ff eX jmpq *R64
|
| - * ^ ^
|
| - * instruction_begin current_position
|
| - *
|
| - * We don't care about "?" (they are checked by DFA).
|
| - */
|
| -static INLINE Bool VerifyNaclCallOrJmpAddToReg(
|
| - const uint8_t *instruction_begin,
|
| - const uint8_t *current_position) {
|
| - return
|
| - RMFromModRM(instruction_begin[-5]) == RegFromModRM(instruction_begin[-1]) &&
|
| - RMFromModRM(instruction_begin[-5]) == RMFromModRM(current_position[0]);
|
| -}
|
| -
|
| -/*
|
| - * This function checks that naclcall or nacljmp are correct (that is: three
|
| - * component instructions match) and if that is true then it merges call or jmp
|
| - * with a sandboxing to get a "superinstruction" and removes in-between jump
|
| - * targets. If it's not true then it triggers "unrecognized instruction" error
|
| - * condition.
|
| - *
|
| - * This version is for the case where "add with dst register in RM field"
|
| - * (opcode 0x01) and "add without REX prefix" is used.
|
| - *
|
| - * There are two possibile forms:
|
| - *
|
| - * 0: 83 eX e0 and $~0x1f,E86
|
| - * 3: 4? 01 fX add RBASE,R86
|
| - * 6: ff eX jmpq *R86
|
| - * ^ ^
|
| - * instruction_begin current_position
|
| - *
|
| - * 0: 83 eX e0 and $~0x1f,E86
|
| - * 3: 4? 01 fX add RBASE,R86
|
| - * 6: 4? ff eX jmpq *R86
|
| - * ^ ^
|
| - * instruction_begin current_position
|
| - */
|
| -static INLINE void ProcessNaclCallOrJmpAddToRMNoRex(
|
| - uint32_t *instruction_info_collected,
|
| - const uint8_t **instruction_begin,
|
| - const uint8_t *current_position,
|
| - const uint8_t *data,
|
| - bitmap_word *valid_targets) {
|
| - if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position))
|
| - ExpandSuperinstructionBySandboxingBytes(
|
| - 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets);
|
| - else
|
| - *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
|
| -}
|
| -
|
| -/*
|
| - * This function checks that naclcall or nacljmp are correct (that is: three
|
| - * component instructions match) and if that is true then it merges call or jmp
|
| - * with a sandboxing to get a "superinstruction" and removes in-between jump
|
| - * targets. If it's not true then it triggers "unrecognized instruction" error
|
| - * condition.
|
| - *
|
| - * This version is for the case where "add with dst register in REG field"
|
| - * (opcode 0x03) and "add without REX prefix" is used.
|
| - *
|
| - * There are two possibile forms:
|
| - *
|
| - * 0: 83 eX e0 and $~0x1f,E86
|
| - * 3: 4? 03 Xf add RBASE,R86
|
| - * 6: ff eX jmpq *R86
|
| - * ^ ^
|
| - * instruction_begin current_position
|
| - *
|
| - * 0: 83 eX e0 and $~0x1f,E86
|
| - * 3: 4? 03 Xf add RBASE,R86
|
| - * 6: 4? ff eX jmpq *R86
|
| - * ^ ^
|
| - * instruction_begin current_position
|
| - */
|
| -static INLINE void ProcessNaclCallOrJmpAddToRegNoRex(
|
| - uint32_t *instruction_info_collected,
|
| - const uint8_t **instruction_begin,
|
| - const uint8_t *current_position,
|
| - const uint8_t *data,
|
| - bitmap_word *valid_targets) {
|
| - if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position))
|
| - ExpandSuperinstructionBySandboxingBytes(
|
| - 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets);
|
| - else
|
| - *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
|
| -}
|
| -
|
| -/*
|
| - * This function checks that naclcall or nacljmp are correct (that is: three
|
| - * component instructions match) and if that is true then it merges call or jmp
|
| - * with a sandboxing to get a "superinstruction" and removes in-between jump
|
| - * targets. If it's not true then it triggers "unrecognized instruction" error
|
| - * condition.
|
| - *
|
| - * This version is for the case where "add with dst register in RM field"
|
| - * (opcode 0x01) and "add without REX prefix" is used.
|
| - *
|
| - * There are three possibile forms:
|
| - *
|
| - * 0: 4? 83 eX e0 and $~0x1f,E86
|
| - * 4: 4? 01 fX add RBASE,R86
|
| - * 7: ff eX jmpq *R86
|
| - * ^ ^
|
| - * instruction_begin current_position
|
| - *
|
| - * 0: 4? 83 eX e0 and $~0x1f,E86
|
| - * 4: 4? 01 fX add RBASE,R86
|
| - * 7: 4? ff eX jmpq *R86
|
| - * ^ ^
|
| - * instruction_begin current_position
|
| - *
|
| - * 0: 4? 83 eX e0 and $~0x1f,E64
|
| - * 4: 4? 01 fX add RBASE,R64
|
| - * 7: 4? ff eX jmpq *R64
|
| - * ^ ^
|
| - * instruction_begin current_position
|
| - */
|
| -static INLINE void ProcessNaclCallOrJmpAddToRMWithRex(
|
| - uint32_t *instruction_info_collected,
|
| - const uint8_t **instruction_begin,
|
| - const uint8_t *current_position,
|
| - const uint8_t *data,
|
| - bitmap_word *valid_targets) {
|
| - if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position))
|
| - ExpandSuperinstructionBySandboxingBytes(
|
| - 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets);
|
| - else
|
| - *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
|
| -}
|
| -
|
| -/*
|
| - * This function checks that naclcall or nacljmp are correct (that is: three
|
| - * component instructions match) and if that is true then it merges call or jmp
|
| - * with a sandboxing to get a "superinstruction" and removes in-between jump
|
| - * targets. If it's not true then it triggers "unrecognized instruction" error
|
| - * condition.
|
| - *
|
| - * This version is for the case where "add with dst register in REG field"
|
| - * (opcode 0x03) and "add without REX prefix" is used.
|
| - *
|
| - * There are three possibile forms:
|
| - *
|
| - * 0: 4? 83 eX e0 and $~0x1f,E86
|
| - * 4: 4? 03 Xf add RBASE,R86
|
| - * 7: ff eX jmpq *R86
|
| - * ^ ^
|
| - * instruction_begin current_position
|
| - *
|
| - * 0: 4? 83 eX e0 and $~0x1f,E86
|
| - * 4: 4? 03 Xf add RBASE,R86
|
| - * 7: 4? ff eX jmpq *R86
|
| - * ^ ^
|
| - * instruction_begin current_position
|
| - *
|
| - * 0: 4? 83 eX e0 and $~0x1f,E64
|
| - * 4: 4? 03 Xf add RBASE,R64
|
| - * 7: 4? ff eX jmpq *R64
|
| - * ^ ^
|
| - * instruction_begin current_position
|
| - */
|
| -static INLINE void ProcessNaclCallOrJmpAddToRegWithRex(
|
| - uint32_t *instruction_info_collected,
|
| - const uint8_t **instruction_begin,
|
| - const uint8_t *current_position,
|
| - const uint8_t *data,
|
| - bitmap_word *valid_targets) {
|
| - if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position))
|
| - ExpandSuperinstructionBySandboxingBytes(
|
| - 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets);
|
| - else
|
| - *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
|
| -}
|
| -
|
| -
|
| -Bool ValidateChunkAMD64(const uint8_t *data, size_t size,
|
| - uint32_t options,
|
| - const NaClCPUFeaturesX86 *cpu_features,
|
| - ValidationCallbackFunc user_callback,
|
| - void *callback_data) {
|
| - bitmap_word valid_targets_small;
|
| - bitmap_word jump_dests_small;
|
| - bitmap_word *valid_targets;
|
| - bitmap_word *jump_dests;
|
| - const uint8_t *current_position;
|
| - const uint8_t *end_of_bundle;
|
| - int result = TRUE;
|
| -
|
| - CHECK(sizeof valid_targets_small == sizeof jump_dests_small);
|
| - CHECK(size % kBundleSize == 0);
|
| -
|
| - /*
|
| - * For a very small sequences (one bundle) malloc is too expensive.
|
| - *
|
| - * Note1: we allocate one extra bit, because we set valid jump target bits
|
| - * _after_ instructions, so there will be one at the end of the chunk.
|
| - *
|
| - * Note2: we don't ever mark first bit as a valid jump target but this is
|
| - * not a problem because any aligned address is valid jump target.
|
| - */
|
| - if ((size + 1) <= (sizeof valid_targets_small * 8)) {
|
| - valid_targets_small = 0;
|
| - valid_targets = &valid_targets_small;
|
| - jump_dests_small = 0;
|
| - jump_dests = &jump_dests_small;
|
| - } else {
|
| - valid_targets = BitmapAllocate(size + 1);
|
| - jump_dests = BitmapAllocate(size + 1);
|
| - if (!valid_targets || !jump_dests) {
|
| - free(jump_dests);
|
| - free(valid_targets);
|
| - errno = ENOMEM;
|
| - return FALSE;
|
| - }
|
| - }
|
| -
|
| - /*
|
| - * This option is usually used in tests: we will process the whole chunk
|
| - * in one pass. Usually each bundle is processed separately which means
|
| - * instructions (and super-instructions) can not cross borders of the bundle.
|
| - */
|
| - if (options & PROCESS_CHUNK_AS_A_CONTIGUOUS_STREAM)
|
| - end_of_bundle = data + size;
|
| - else
|
| - end_of_bundle = data + kBundleSize;
|
| -
|
| - /*
|
| - * Main loop. Here we process the data array bundle-after-bundle.
|
| - * Ragel-produced DFA does all the checks with one exception: direct jumps.
|
| - * It collects the two arrays: valid_targets and jump_dests which are used
|
| - * to test direct jumps later.
|
| - */
|
| - for (current_position = data;
|
| - current_position < data + size;
|
| - current_position = end_of_bundle,
|
| - end_of_bundle = current_position + kBundleSize) {
|
| - /* Start of the instruction being processed. */
|
| - const uint8_t *instruction_begin = current_position;
|
| - /* Only used locally in the end_of_instruction_cleanup action. */
|
| - const uint8_t *instruction_end;
|
| - int current_state;
|
| - uint32_t instruction_info_collected = 0;
|
| - /* Keeps one byte of information per operand in the current instruction:
|
| - * 2 bits for register kinds,
|
| - * 5 bits for register numbers (16 regs plus RIZ). */
|
| - uint32_t operand_states = 0;
|
| - enum OperandName base = NO_REG;
|
| - enum OperandName index = NO_REG;
|
| - enum OperandName restricted_register =
|
| - EXTRACT_RESTRICTED_REGISTER_INITIAL_VALUE(options);
|
| - uint8_t rex_prefix = FALSE;
|
| - /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */
|
| - uint8_t vex_prefix2 = VEX_R | VEX_X | VEX_B;
|
| - uint8_t vex_prefix3 = 0x00;
|
| -
|
| - %% write init;
|
| - %% write exec;
|
| -
|
| - /*
|
| - * Ragel DFA accepted the bundle, but we still need to make sure the last
|
| - * instruction haven't left %rbp or %rsp in restricted state.
|
| - */
|
| - if (restricted_register == REG_RBP)
|
| - result &= user_callback(end_of_bundle, end_of_bundle,
|
| - RESTRICTED_RBP_UNPROCESSED |
|
| - ((REG_RBP << RESTRICTED_REGISTER_SHIFT) &
|
| - RESTRICTED_REGISTER_MASK), callback_data);
|
| - else if (restricted_register == REG_RSP)
|
| - result &= user_callback(end_of_bundle, end_of_bundle,
|
| - RESTRICTED_RSP_UNPROCESSED |
|
| - ((REG_RSP << RESTRICTED_REGISTER_SHIFT) &
|
| - RESTRICTED_REGISTER_MASK), callback_data);
|
| - }
|
| -
|
| - /*
|
| - * Check the direct jumps. All the targets from jump_dests must be in
|
| - * valid_targets.
|
| - */
|
| - result &= ProcessInvalidJumpTargets(data, size, valid_targets, jump_dests,
|
| - user_callback, callback_data);
|
| -
|
| - /* We only use malloc for a large code sequences */
|
| - if (jump_dests != &jump_dests_small) free(jump_dests);
|
| - if (valid_targets != &valid_targets_small) free(valid_targets);
|
| - if (!result) errno = EINVAL;
|
| - return result;
|
| -}
|
|
|