src/trusted/validator_ragel/validator_x86_64.rl - Issue 11000033: Move validator_x86_XX.rl out of unreviewed.

Unified Diff: src/trusted/validator_ragel/validator_x86_64.rl

Issue 11000033: Move validator_x86_XX.rl out of unreviewed. (Closed) Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client/

Patch Set: Created 8 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/trusted/validator_ragel/validator_x86_64.rl

===================================================================

--- src/trusted/validator_ragel/validator_x86_64.rl (revision 9944)

+++ src/trusted/validator_ragel/validator_x86_64.rl (working copy)

@@ -4,6 +4,14 @@

* found in the LICENSE file.

+/*

+ * This is the core of amd64-mode validator. Please note that this file

+ * combines ragel machine description and C language actions. Please read

+ * validator_internals.html first to understand how the whole thing is built:

+ * it explains how the byte sequences are constructed, what constructs like

+ * “@{}” or “REX_WRX?” mean, etc.

+ */

#include <assert.h>

#include <errno.h>

#include <stddef.h>

@@ -11,7 +19,7 @@

#include <stdlib.h>

#include <string.h>

-#include "native_client/src/trusted/validator_ragel/unreviewed/validator_internal.h"

+#include "native_client/src/trusted/validator_ragel/validator_internal.h"

%%{

machine x86_64_validator;

@@ -49,13 +57,20 @@

"native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";

include immediate_fields_parsing_amd64

"native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";

+ # rel8 actions are used in relative jumps with 8-bit offset.

action rel8_operand {

rel8_operand(current_position + 1, data, jump_dests, size,

&instruction_info_collected);

}

+ # rel16 actions are used in relative jums with 16-bit offset.

+ #

+ # Such instructions should be included in the validator's DFA, but we can not

+ # just exlude them because they are refenced in relative_fields_parsing ragel

+ # machine. Ensure compilations error in case of usage.

action rel16_operand {

#error rel16_operand should never be used in nacl

}

+ # rel32 actions are used in relative calls and jumps with 32-bit offset.

action rel32_operand {

rel32_operand(current_position + 1, data, jump_dests, size,

&instruction_info_collected);

@@ -79,25 +94,25 @@

}

action process_0_operands {

- process_0_operands(&restricted_register, &instruction_info_collected);

+ Process0Operands(&restricted_register, &instruction_info_collected);

}

action process_1_operand {

- process_1_operand(&restricted_register, &instruction_info_collected,

- rex_prefix, operand_states);

+ Process1Operand(&restricted_register, &instruction_info_collected,

+ rex_prefix, operand_states);

}

action process_1_operand_zero_extends {

- process_1_operand_zero_extends(&restricted_register,

- &instruction_info_collected, rex_prefix,

- operand_states);

+ Process1OperandZeroExtends(&restricted_register,

+ &instruction_info_collected, rex_prefix,

+ operand_states);

}

action process_2_operands {

- process_2_operands(&restricted_register, &instruction_info_collected,

- rex_prefix, operand_states);

+ Process2Operands(&restricted_register, &instruction_info_collected,

+ rex_prefix, operand_states);

}

action process_2_operands_zero_extends {

- process_2_operands_zero_extends(&restricted_register,

- &instruction_info_collected, rex_prefix,

- operand_states);

+ Process2OperandsZeroExtends(&restricted_register,

+ &instruction_info_collected, rex_prefix,

+ operand_states);

}

include decode_x86_64 "validator_x86_64_instruction.rl";

@@ -105,29 +120,46 @@

data16condrep = (data16 | condrep data16 | data16 condrep);

data16rep = (data16 | rep data16 | data16 rep);

- # Special %rbp modifications without required sandboxing

+ # Special %rbp modifications—the ones which don't need a sandboxing.

+ #

+ # Note that there are two different opcodes for “mov”: “mov” with opcode

+ # “0x89” moves from “A” to “B” while “mov” with opcode “0x8b” moves from

+ # “B” to “A”.

rbp_modifications =

(b_0100_10x0 0x89 0xe5) | # mov %rsp,%rbp

- (b_0100_10x0 0x8b 0xec) # | mov %rsp,%rbp

- #(b_0100_1xx0 0x81 0xe5 any{3} (0x80 .. 0xff)) | # and $XXX,%rbp

- #(b_0100_1xx0 0x83 0xe5 (0x80 .. 0xff)) # and $XXX,%rbp

+ (b_0100_10x0 0x8b 0xec) # mov %rsp,%rbp

@process_0_operands;

- # Special instructions used for %rbp sandboxing

+ # Special instructions used for %rbp sandboxing.

+ #

+ # This is the “second half” of the %rbp sandboxing. Any zero-extending

+ # instruction which stores the data in %ebp can be first part, but unlike

+ # the situation with other “normal” registers you can not just write to

+ # %ebp and continue: such activity MUST restore the status quo immediately

+ # via one of these instructions.

rbp_sandboxing =

- (b_0100_11x0 0x01 0xfd | # add %r15,%rbp

- b_0100_10x1 0x03 0xef | # add %r15,%rbp

- 0x49 0x8d 0x2c 0x2f | # lea (%r15,%rbp,1),%rbp

- 0x4a 0x8d 0x6c 0x3d 0x00) # lea 0x0(%rbp,%r15,1),%rbp

+ (b_0100_11x0 0x01 0xfd | # add %r15,%rbp

+ b_0100_10x1 0x03 0xef | # add %r15,%rbp

+ 0x49 0x8d 0x2c 0x2f | # lea (%r15,%rbp,1),%rbp

+ 0x4a 0x8d 0x6c 0x3d 0x00) # lea 0x0(%rbp,%r15,1),%rbp

+ # “Normal” instructions detect an error when confronted with restricted

+ # register %rbp. These instructions require this state instead.

+ #

+ # Check this precondition and mark the beginning of the instruction as

+ # invalid jump for target.

@{ if (restricted_register == REG_RBP)

instruction_info_collected |= RESTRICTED_REGISTER_USED;

else

instruction_info_collected |= UNRESTRICTED_RBP_PROCESSED;

restricted_register = NO_REG;

- BitmapClearBit(valid_targets, (instruction_start - data));

+ MakeInvalidJumpTarget((instruction_start - data), valid_targets);

};

- # Special %rbp modifications without required sandboxing

+ # Special %rsp modifications—the ones which don't need a sandboxing.

+ #

+ # Note that there are two different opcodes for “mov”: “mov” with opcode

+ # “0x89” moves from “A” to “B” while “mov” with opcode “0x8b” moves from

+ # “B” to “A”.

rsp_modifications =

(b_0100_10x0 0x89 0xec) | # mov %rbp,%rsp

(b_0100_10x0 0x8b 0xe5) | # mov %rbp,%rsp

@@ -137,115 +169,177 @@

(b_0100_1000 0x83 0xe4 (0x80 .. 0xff)) # and $XXX,%rsp

@process_0_operands;

- # Special instructions used for %rbp sandboxing

+ # Special instructions used for %rsp sandboxing.

+ #

+ # This is the “second half” of the %rsp sandboxing. Any zero-extending

+ # instruction which stores the data in %esp can be first part, but unlike

+ # the situation with other “normal” registers you can not just write to

+ # %esp and continue: such activity MUST restore the status quo immediately

+ # via one of these instructions.

rsp_sandboxing =

- (b_0100_11x0 0x01 0xfc | # add %r15,%rsp

- b_0100_10x1 0x03 0xe7 | # add %r15,%rbp

- 0x4a 0x8d 0x24 0x3c) # lea (%rsp,%r15,1),%rsp

+ (b_0100_11x0 0x01 0xfc | # add %r15,%rsp

+ b_0100_10x1 0x03 0xe7 | # add %r15,%rsp

+ 0x4a 0x8d 0x24 0x3c) # lea (%rsp,%r15,1),%rsp

+ # “Normal” instructions detect an error when confronted with restricted

+ # register %rsp. These instructions require this state instead.

+ #

+ # Check this precondition and mark the beginning of the instruction as

+ # invalid jump for target.

@{ if (restricted_register == REG_RSP)

instruction_info_collected |= RESTRICTED_REGISTER_USED;

else

instruction_info_collected |= UNRESTRICTED_RSP_PROCESSED;

restricted_register = NO_REG;

- BitmapClearBit(valid_targets, (instruction_start - data));

+ MakeInvalidJumpTarget((instruction_start - data), valid_targets);

};

- # naclcall or nacljmp. Note: first "and $~0x1f, %eXX" is a normal instruction

- # and as such will detect case where %rbp/%rsp is illegally modified.

+ # naclcall or nacljmp. These are three-instruction indirection-jump sequences.

+ # and $~0x1f, %eXX

+ # and RBASE, %rXX

+ # jmpq *%rXX (or: callq *%rXX)

+ # Note: first "and $~0x1f, %eXX" is a normal instruction and as such will

+ # detect case where %rbp/%rsp is illegally modified when this machine will be

+ # combined with normal_instruction machine.

+ #

+ # There are number of variants present which differ by the REX prefix usage:

+ # we need to make sure “%eXX” in “and”, “%rXX” in “add”, and “%eXX” in “jmpq”

+ # or “callq” is the same register and it's much simpler to do if one single

+ # action handles only fixed number of bytes.

+ #

+ # Additional complication arises because x86-64 contains two different “add”

+ # instruction: with “0x01” and “0x03” opcode. They differ in the direction

+ # used: both can add “A” and “B” but one of them stores the result in “A” and

+ # other stores the result in “B” (see AMD/Intel manual for clarification).

+ # Both should be allowed.

+ #

+ # REGISTER USAGE ABBREVIATIONS:

+ # E86: legacy ia32 registers (all eight: %eax to %edi)

+ # R86: 64-bit counterparts for legacy 386 registers (%rax to %rdi)

+ # E64: 32-bit counterparts for new amd64 registers (%r8d to %r14d)

+ # R64: new amd64 registers (only seven: %r8 to %r14)

+ # RBASE: %r15 (used as “base of untrusted world” in NaCl for amd64)

naclcall_or_nacljmp =

- # and $~0x1f, %eax/%ecx/%edx/%ebx/%esp/%ebp/%esi/%edi

- (0x83 (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7) 0xe0

- # add %r15,%rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi

- b_0100_11x0 0x01 (0xf8|0xf9|0xfa|0xfb|0xfc|0xfd|0xfe|0xff)

- # callq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi

- ((REX_WRX? 0xff (0xd0|0xd1|0xd2|0xd3|0xd4|0xd5|0xd6|0xd7)) |

- # jmpq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi

- (REX_WRX? 0xff (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7))))

- @{

- instruction_start -= 6;

- if (RMFromModRM(instruction_start[1]) !=

- RMFromModRM(instruction_start[5]) ||

- RMFromModRM(instruction_start[1]) != RMFromModRM(*current_position))

- instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;

- BitmapClearBit(valid_targets, (instruction_start - data) + 3);

- BitmapClearBit(valid_targets, (instruction_start - data) + 6);

- restricted_register = NO_REG;

- } |

+ # This block encodes call and jump superinstructions of the form:

+ # 0: 83 e_ e0 and $~0x1f,E86

+ # 3: 4_ 01 f_ add RBASE,R86

+ # 6: ff e_ jmpq *R86

+ #### INSTRUCTION ONE (three bytes)

+ # and $~0x1f, E86

+ (0x83 b_11_100_xxx 0xe0

+ #### INSTRUCTION TWO (three bytes)

+ # add RBASE, R86 (0x01 opcode)

+ b_0100_11x0 0x01 b_11_111_xxx

+ #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)

+ # callq R86

+ ((REX_WRX? 0xff b_11_010_xxx) |

+ #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)

+ # jmpq R86

+ (REX_WRX? 0xff b_11_100_xxx)))

+ @{

+ ProcessNaclCallOrJmpAddToRMNoRex(&instruction_info_collected,

+ &instruction_start, current_position,

+ data, valid_targets);

+ } |

- # and $~0x1f, %eax/%ecx/%edx/%ebx/%esp/%ebp/%esi/%edi

- (0x83 (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7) 0xe0

- # add %r15,%rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi

- b_0100_10x1 0x03 (0xc7|0xcf|0xd7|0xdf|0xe7|0xef|0xf7|0xff)

- # callq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi

- ((REX_WRX? 0xff (0xd0|0xd1|0xd2|0xd3|0xd4|0xd5|0xd6|0xd7)) |

- # jmpq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi

- (REX_WRX? 0xff (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7))))

- @{

- instruction_start -= 6;

- if (RMFromModRM(instruction_start[1]) !=

- RegFromModRM(instruction_start[5]) ||

- RMFromModRM(instruction_start[1]) != RMFromModRM(*current_position))

- instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;

- BitmapClearBit(valid_targets, (instruction_start - data) + 3);

- BitmapClearBit(valid_targets, (instruction_start - data) + 6);

- restricted_register = NO_REG;

- } |

+ # This block encodes call and jump superinstructions of the form:

+ # 0: 83 e_ e0 and $~0x1f,E86

+ # 3: 4_ 03 f_ add RBASE,R86

+ # 6: ff e_ jmpq *R86

+ #### INSTRUCTION ONE (three bytes)

+ # and $~0x1f, E86

+ (0x83 b_11_100_xxx 0xe0

+ #### INSTRUCTION TWO (three bytes)

+ # add RBASE, R86 (0x03 opcode)

+ b_0100_10x1 0x03 b_11_xxx_111

+ #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)

+ # callq R86

+ ((REX_WRX? 0xff b_11_010_xxx) |

+ #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)

+ # jmpq R86

+ (REX_WRX? 0xff b_11_100_xxx)))

+ @{

+ ProcessNaclCallOrJmpAddToRegNoRex(&instruction_info_collected,

+ &instruction_start, current_position,

+ data, valid_targets);

+ } |

- # rex.R?X? and $~0x1f, %eax/%ecx/%edx/%ebx/%esp/%ebp/%esi/%edi

- ((REX_RX 0x83 (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7) 0xe0

- # add %r15,%rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi

- b_0100_11x0 0x01 (0xf8|0xf9|0xfa|0xfb|0xfc|0xfd|0xfe|0xff)

- # callq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi

- ((REX_WRX? 0xff (0xd0|0xd1|0xd2|0xd3|0xd4|0xd5|0xd6|0xd7)) |

- # jmpq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi

- (REX_WRX? 0xff (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7)))) |

+ # This block encodes call and jump superinstructions of the form:

+ # 0: 4_ 83 e_ e0 and $~0x1f,E86

+ # 4: 4_ 01 f_ add RBASE,R86

+ # 7: ff e_ jmpq *R86

+ #### INSTRUCTION ONE (four bytes)

+ # and $~0x1f, E86

+ ((REX_RX 0x83 b_11_100_xxx 0xe0

+ #### INSTRUCTION TWO (three bytes)

+ # add RBASE, R86 (0x01 opcode)

+ b_0100_11x0 0x01 b_11_111_xxx

+ #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)

+ # callq R86

+ ((REX_WRX? 0xff b_11_010_xxx) |

+ #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)

+ # jmpq R86

+ (REX_WRX? 0xff b_11_100_xxx))) |

- # and $~0x1f, %r8d/%r9d/%r10d/%r11d/%r12d/%r13d/%r14d

- (b_0100_0xx1 0x83 (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6) 0xe0

- # add %r15, %r8d/%r9d/%r10d/%r11d/%r12d/%r13d/%r14d

- b_0100_11x1 0x01 (0xf8|0xf9|0xfa|0xfb|0xfc|0xfd|0xfe)

- # callq %r8/%r9/%r10/%r11/%r12/%r13/%r14

- ((b_0100_xxx1 0xff (0xd0|0xd1|0xd2|0xd3|0xd4|0xd5|0xd6)) |

- # jmpq %r8/%r9/%r10/%r11/%r12/%r13/%r14

- (b_0100_xxx1 0xff (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6)))))

- @{

- instruction_start -= 7;

- if (RMFromModRM(instruction_start[2]) !=

- RMFromModRM(instruction_start[6]) ||

- RMFromModRM(instruction_start[2]) != RMFromModRM(*current_position))

- instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;

- BitmapClearBit(valid_targets, (instruction_start - data) + 4);

- BitmapClearBit(valid_targets, (instruction_start - data) + 7);

- restricted_register = NO_REG;

- } |

+ # This block encodes call and jump superinstructions of the form:

+ # 0: 4_ 83 e_ e0 and $~0x1f,E64

+ # 4: 4_ 01 f_ add RBASE,R64

+ # 7: 4_ ff e_ jmpq *R64

+ #### INSTRUCTION ONE (four bytes)

+ # and $~0x1f, E64

+ (b_0100_0xx1 0x83 (b_11_100_xxx - b_11_100_111) 0xe0

+ #### INSTRUCTION TWO (three bytes)

+ # add RBASE, R64 (0x01 opcode)

+ b_0100_11x1 0x01 (b_11_111_xxx - b_11_111_111)

+ #### INSTRUCTION THREE: call (three bytes)

+ # callq R64

+ ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) |

+ #### INSTRUCTION THREE: jmp (three bytes)

+ # jmpq R64

+ (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111)))))

+ @{

+ ProcessNaclCallOrJmpAddToRMWithRex(&instruction_info_collected,

+ &instruction_start, current_position,

+ data, valid_targets);

+ } |

- # rex.R?X? and $~0x1f, %eax/%ecx/%edx/%ebx/%esp/%ebp/%esi/%edi

- ((REX_RX 0x83 (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7) 0xe0

- # add %r15,%rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi

- b_0100_10x1 0x03 (0xc7|0xcf|0xd7|0xdf|0xe7|0xef|0xf7|0xff)

- # callq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi

- ((REX_WRX? 0xff (0xd0|0xd1|0xd2|0xd3|0xd4|0xd5|0xd6|0xd7)) |

- # jmpq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi

- (REX_WRX? 0xff (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7)))) |

+ # This block encodes call and jump superinstructions of the form:

+ # 0: 4_ 83 e_ e0 and $~0x1f,E86

+ # 4: 4_ 03 f_ add RBASE,R86

+ # 7: ff e_ jmpq *R86

+ #### INSTRUCTION ONE (four bytes)

+ # and $~0x1f, E86

+ ((REX_RX 0x83 b_11_100_xxx 0xe0

+ #### INSTRUCTION TWO (three bytes)

+ # add RBASE, R86 (0x03 opcode)

+ b_0100_10x1 0x03 b_11_xxx_111

+ #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)

+ # callq R86

+ ((REX_WRX? 0xff b_11_010_xxx) |

+ #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)

+ # jmpq R86

+ (REX_WRX? 0xff b_11_100_xxx))) |

- # and $~0x1f, %r8d/%r9d/%r10d/%r11d/%r12d/%r13d/%r14d

- (b_0100_0xx1 0x83 (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6) 0xe0

- # add %r15, %r8d/%r9d/%r10d/%r11d/%r12d/%r13d/%r14d

- b_0100_11x1 0x03 (0xc7|0xcf|0xd7|0xdf|0xe7|0xef|0xf7)

- # callq %r8/%r9/%r10/%r11/%r12/%r13/%r14

- ((b_0100_xxx1 0xff (0xd0|0xd1|0xd2|0xd3|0xd4|0xd5|0xd6)) |

- # jmpq %r8/%r9/%r10/%r11/%r12/%r13/%r14

- (b_0100_xxx1 0xff (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6)))))

- @{

- instruction_start -= 7;

- if (RMFromModRM(instruction_start[2]) !=

- RegFromModRM(instruction_start[6]) ||

- RMFromModRM(instruction_start[2]) != RMFromModRM(*current_position))

- instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;

- BitmapClearBit(valid_targets, (instruction_start - data) + 4);

- BitmapClearBit(valid_targets, (instruction_start - data) + 7);

- restricted_register = NO_REG;

- };

+ # This block encodes call and jump superinstructions of the form:

+ # 0: 4_ 83 e_ e0 and $~0x1f,E64

+ # 4: 4_ 03 f_ add RBASE,R64

+ # 7: 4_ ff e_ jmpq *R64

+ #### INSTRUCTION ONE (four bytes)

+ # and $~0x1f, E64

+ (b_0100_0xx1 0x83 (b_11_100_xxx - b_11_100_111) 0xe0

+ #### INSTRUCTION TWO (three bytes)

+ # add RBASE, R64 (0x03 opcode)

+ b_0100_11x1 0x03 (b_11_xxx_111 - b_11_111_111)

+ #### INSTRUCTION THREE: call (three bytes)

+ # callq R64

+ ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) |

+ #### INSTRUCTION THREE: jmp (three bytes)

+ # jmpq R64

+ (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111)))))

+ @{

+ ProcessNaclCallOrJmpAddToRegWithRex(&instruction_info_collected,

+ &instruction_start, current_position,

+ data, valid_targets);

+ };

# EMMS/SSE2/AVX instructions which have implicit %ds:(%rsi) operand

# maskmovq %mmX,%mmY

@@ -267,7 +361,7 @@

string_instruction_rsi_no_rdi =

(rep? 0xac | # lods %ds:(%rsi),%al

data16rep 0xad | # lods %ds:(%rsi),%ax

- rep? REXW_NONE? 0xad) ; # lods %ds:(%rsi),%eax/%rax

+ rep? REXW_NONE? 0xad); # lods %ds:(%rsi),%eax/%rax

# String instructions which use only %ds:(%rdi)

string_instruction_rdi_no_rsi =

@@ -277,7 +371,7 @@

rep? 0xaa | # stos %al,%es:(%rdi)

data16rep 0xab | # stos %ax,%es:(%rdi)

- rep? REXW_NONE? 0xab ; # stos %eax/%rax,%es:(%rdi)

+ rep? REXW_NONE? 0xab; # stos %eax/%rax,%es:(%rdi)

# String instructions which use both %ds:(%rsi) and %ds:(%rdi)

string_instruction_rsi_rdi =

@@ -287,107 +381,107 @@

rep? 0xa4 | # movsb %es:(%rdi),%ds:(%rsi)

data16rep 0xa5 | # movsw %es:(%rdi),%ds:(%rsi)

- rep? REXW_NONE? 0xa5 ; # movs[lq] %es:(%rdi),%ds:(%rsi)

+ rep? REXW_NONE? 0xa5; # movs[lq] %es:(%rdi),%ds:(%rsi)

+ # Superinstruction which handle instructions which require sandboxed %rsi.

+ #

+ # There are two variants which handle spurious REX prefixes.

+ #

+ # Note that both “0x89 0xf6” and “0x8b 0xf6” encode “mov %esi,%esi”:

+ # “mov” with opcode “0x89” moves from “A” to “B” while “mov” with opcode

+ # “0x8b” moves from “B” to “A” but when “A” and “B” happen to denote the

+ # same register there are no functional difference between these opcodes.

sandbox_instruction_rsi_no_rdi =

- (0x89 | 0x8b) 0xf6 . # mov %esi,%esi

- 0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi

+ (0x89 | 0x8b) 0xf6 # mov %esi,%esi

+ 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi

string_instruction_rsi_no_rdi

- instruction_start -= 6;

- BitmapClearBit(valid_targets, (instruction_start - data) + 2);

- BitmapClearBit(valid_targets, (instruction_start - data) + 6);

- restricted_register = NO_REG;

+ SandboxRxiSuperInstNoRexOnMov(&instruction_start, data, valid_targets);

} |

- REX_X (0x89 | 0x8b) 0xf6 . # mov %esi,%esi

- 0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi

+ REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi

+ 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi

string_instruction_rsi_no_rdi

- instruction_start -= 7;

- BitmapClearBit(valid_targets, (instruction_start - data) + 3);

- BitmapClearBit(valid_targets, (instruction_start - data) + 7);

- restricted_register = NO_REG;

+ SandboxRxiSuperInstWithRexOnMov(&instruction_start, data, valid_targets);

};

+ # Superinstruction which handle instructions which require sandboxed %rdi.

+ #

+ # There are two variants which handle spurious REX prefixes.

+ #

+ # Note that both “0x89 0xff” and “0x8b 0xff” encode “mov %edi,%edi”:

+ # “mov” with opcode “0x89” moves from “A” to “B” while “mov” with opcode

+ # “0x8b” moves from “B” to “A” but when “A” and “B” happen to denote the

+ # same register there are no functional difference between these opcodes.

sandbox_instruction_rdi_no_rsi =

- (0x89 | 0x8b) 0xff . # mov %edi,%edi

- 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi

+ (0x89 | 0x8b) 0xff # mov %edi,%edi

+ 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi

(string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction)

- instruction_start -= 6;

- BitmapClearBit(valid_targets, (instruction_start - data) + 2);

- BitmapClearBit(valid_targets, (instruction_start - data) + 6);

- restricted_register = NO_REG;

+ SandboxRxiSuperInstNoRexOnMov(&instruction_start, data, valid_targets);

} |

- REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi

- 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi

+ REX_X (0x89 | 0x8b) 0xff # mov %edi,%edi

+ 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi

(string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction)

- instruction_start -= 7;

- BitmapClearBit(valid_targets, (instruction_start - data) + 3);

- BitmapClearBit(valid_targets, (instruction_start - data) + 7);

- restricted_register = NO_REG;

+ SandboxRxiSuperInstWithRexOnMov(&instruction_start, data, valid_targets);

};

- # String instructions which use both %ds:(%rsi) and %ds:(%rdi)

+ # Superinstruction which handle instructions which require both sandboxed %rsi

+ # and sandboxed %rdi.

+ #

+ # There are four variants which handle spurious REX prefixes.

+ #

+ # Note that both “0x89 0xf6” and “0x8b 0xf6” encode “mov %esi,%esi” while both

+ # “0x89 0xff” and “0x8b 0xff” encode “mov %edi,%edi”: “mov” with opcode “0x89”

+ # moves from “A” to “B” while “mov” with opcode “0x8b” moves from “B” to “A”

+ # but when “A” and “B” happen to denote the same register there are no

+ # functional difference between these opcodes.

+ #

+ # Note: we call SandboxRxiSuperInst in actions here twice because we have two

+ # sandboxings here - one for %rsi and one for %rdi.

sandbox_instruction_rsi_rdi =

- (0x89 | 0x8b) 0xf6 . # mov %esi,%esi

- 0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi

- (0x89 | 0x8b) 0xff . # mov %edi,%edi

- 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi

+ (0x89 | 0x8b) 0xf6 # mov %esi,%esi

+ 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi

+ (0x89 | 0x8b) 0xff # mov %edi,%edi

+ 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi

string_instruction_rsi_rdi

- instruction_start -= 12;

- BitmapClearBit(valid_targets, (instruction_start - data) + 2);

- BitmapClearBit(valid_targets, (instruction_start - data) + 6);

- BitmapClearBit(valid_targets, (instruction_start - data) + 8);

- BitmapClearBit(valid_targets, (instruction_start - data) + 12);

- restricted_register = NO_REG;

+ SandboxRxiSuperInstNoRexOnMov(&instruction_start, data, valid_targets);

} |

- (0x89 | 0x8b) 0xf6 . # mov %esi,%esi

- 0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi

- REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi

- 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi

+ (0x89 | 0x8b) 0xf6 # mov %esi,%esi

+ 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi

+ REX_X (0x89 | 0x8b) 0xff # mov %edi,%edi

+ 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi

string_instruction_rsi_rdi

- instruction_start -= 13;

- BitmapClearBit(valid_targets, (instruction_start - data) + 2);

- BitmapClearBit(valid_targets, (instruction_start - data) + 6);

- BitmapClearBit(valid_targets, (instruction_start - data) + 9);

- BitmapClearBit(valid_targets, (instruction_start - data) + 13);

- restricted_register = NO_REG;

+ SandboxRxiSuperInstWithRexOnMov(&instruction_start, data, valid_targets);

+ SandboxRxiSuperInstNoRexOnMov(&instruction_start, data, valid_targets);

} |

- REX_X (0x89 | 0x8b) 0xf6 . # mov %esi,%esi

- 0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi

- (0x89 | 0x8b) 0xff . # mov %edi,%edi

- 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi

+ REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi

+ 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi

+ (0x89 | 0x8b) 0xff # mov %edi,%edi

+ 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi

string_instruction_rsi_rdi

- instruction_start -= 13;

- BitmapClearBit(valid_targets, (instruction_start - data) + 3);

- BitmapClearBit(valid_targets, (instruction_start - data) + 7);

- BitmapClearBit(valid_targets, (instruction_start - data) + 9);

- BitmapClearBit(valid_targets, (instruction_start - data) + 13);

- restricted_register = NO_REG;

+ SandboxRxiSuperInstNoRexOnMov(&instruction_start, data, valid_targets);

+ SandboxRxiSuperInstWithRexOnMov(&instruction_start, data, valid_targets);

} |

- REX_X (0x89 | 0x8b) 0xf6 . # mov %esi,%esi

- 0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi

- REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi

- 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi

+ REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi

+ 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi

+ REX_X (0x89 | 0x8b) 0xff # mov %edi,%edi

+ 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi

string_instruction_rsi_rdi

- instruction_start -= 14;

- BitmapClearBit(valid_targets, (instruction_start - data) + 3);

- BitmapClearBit(valid_targets, (instruction_start - data) + 7);

- BitmapClearBit(valid_targets, (instruction_start - data) + 10);

- BitmapClearBit(valid_targets, (instruction_start - data) + 14);

- restricted_register = NO_REG;

+ SandboxRxiSuperInstWithRexOnMov(&instruction_start, data, valid_targets);

};

special_instruction =

@@ -399,6 +493,9 @@

sandbox_instruction_rsi_no_rdi |

sandbox_instruction_rdi_no_rsi |

sandbox_instruction_rsi_rdi)

+ # Mark the instruction as special—currently this information is used only in

+ # tests, but in the future we may use it for dynamic code modification

+ # support.

instruction_info_collected |= SPECIAL_INSTRUCTION;

};

@@ -406,7 +503,10 @@

# Remove special instructions which are only allowed in special cases.

normal_instruction = one_instruction - special_instruction;

- # Check if call is properly aligned

+ # Check if call is properly aligned.

+ #

+ # For direct call we explicitly encode all variations. For indirect call

+ # we accept all the special instructions which ends with indirect call.

call_alignment =

((normal_instruction &

# Direct call

@@ -417,6 +517,8 @@

# Indirect call

(any* data16? REX_WRXB? 0xff ((opcode_2 | opcode_3) any* &

(modrm_memory | modrm_registers)))))

+ # Call instruction must aligned to the end of bundle. Previously this was

+ # strict requirement, today it's just warning to aid with debugging.

if (((current_position - data) & kBundleMask) != kBundleMask)

instruction_info_collected |= BAD_CALL_ALIGNMENT;

@@ -424,9 +526,18 @@

main := ((call_alignment | normal_instruction | special_instruction)

+ # Beginning of the instruction is always valid target for jump. If this

+ # instruction is, in fact, part of the superinstruction then we'll clear

+ # that bit later.

- BitmapSetBit(valid_targets, current_position - data);

+ MakeJumpTargetValid(current_position - data, valid_targets);

}

+ # Here we call the user callback if there are validation errors or if the

+ # CALL_USER_CALLBACK_ON_EACH_INSTRUCTION option is used.

+ #

+ # After that we move instruction_start and clean all the variables which

+ # only used in the processing of a single instruction (prefixes, operand

+ # states and instruction_info_collected).

if ((instruction_info_collected & VALIDATION_ERRORS_MASK) ||

(options & CALL_USER_CALLBACK_ON_EACH_INSTRUCTION)) {

@@ -442,6 +553,7 @@

instruction_start = current_position + 1;

instruction_info_collected = 0;

SET_REX_PREFIX(FALSE);

+ /* Top three bis of VEX2 are inverted: see AMD/Intel manual. */

SET_VEX_PREFIX2(0xe0);

SET_VEX_PREFIX3(0x00);

operand_states = 0;

« src/trusted/validator_ragel/validator_x86_32.rl ('K') | « src/trusted/validator_ragel/validator_x86_32.rl ('k') | no next file » | no next file with comments »