Chromium Code Reviews| Index: src/trusted/validator_ragel/validator_x86_64.rl |
| =================================================================== |
| --- src/trusted/validator_ragel/validator_x86_64.rl (revision 9911) |
| +++ src/trusted/validator_ragel/validator_x86_64.rl (working copy) |
| @@ -11,7 +11,7 @@ |
| #include <stdlib.h> |
| #include <string.h> |
| -#include "native_client/src/trusted/validator_ragel/unreviewed/validator_internal.h" |
| +#include "native_client/src/trusted/validator_ragel/validator_internal.h" |
| %%{ |
| machine x86_64_validator; |
| @@ -106,6 +106,10 @@ |
| data16rep = (data16 | rep data16 | data16 rep); |
| # Special %rbp modifications without required sandboxing |
|
Brad Chen
2012/10/04 17:26:04
This comment is unclear. Why don't these special m
khim
2012/10/05 08:22:53
These special instructions don't require the sandb
|
| + # |
| + # Note that there are two different opcodes for “mov”: “mov” with opcode |
| + # “0x89” moves from “A” to “B” while “mov” with opcode “0x8b” moves from |
| + # “B” to “A”. |
| rbp_modifications = |
| (b_0100_10x0 0x89 0xe5) | # mov %rsp,%rbp |
| (b_0100_10x0 0x8b 0xec) # | mov %rsp,%rbp |
| @@ -124,10 +128,14 @@ |
| else |
| instruction_info_collected |= UNRESTRICTED_RBP_PROCESSED; |
| restricted_register = NO_REG; |
| - BitmapClearBit(valid_targets, (instruction_start - data)); |
| + MakeJumpTargetInvalid((instruction_start - data), valid_targets); |
|
Brad Chen
2012/10/04 17:26:04
Are you going to make the instruction at the targe
khim
2012/10/05 08:22:53
I have no opinion WRT to the best name. We clear t
|
| }; |
| # Special %rbp modifications without required sandboxing |
|
Brad Chen
2012/10/04 17:26:04
This line begins a section that is extremely simil
khim
2012/10/05 08:22:53
Actually this is just bad comment. Previous one de
|
| + # |
| + # Note that there are two different opcodes for “mov”: “mov” with opcode |
| + # “0x89” moves from “A” to “B” while “mov” with opcode “0x8b” moves from |
| + # “B” to “A”. |
| rsp_modifications = |
| (b_0100_10x0 0x89 0xec) | # mov %rbp,%rsp |
| (b_0100_10x0 0x8b 0xe5) | # mov %rbp,%rsp |
| @@ -147,103 +155,204 @@ |
| else |
| instruction_info_collected |= UNRESTRICTED_RSP_PROCESSED; |
| restricted_register = NO_REG; |
| - BitmapClearBit(valid_targets, (instruction_start - data)); |
| + MakeJumpTargetInvalid((instruction_start - data), valid_targets); |
| }; |
| - # naclcall or nacljmp. Note: first "and $~0x1f, %eXX" is a normal instruction |
| - # and as such will detect case where %rbp/%rsp is illegally modified. |
| + # naclcall or nacljmp. These are indirect-jump sequences. They include three |
|
Brad Chen
2012/10/04 17:26:04
Before getting into any specific instructions ther
khim
2012/10/05 08:22:53
This is validator_internals.html.
|
| + # commands: |
|
Brad Chen
2012/10/04 17:26:04
How about "These are three-instruction indirection
khim
2012/10/05 08:22:53
Done.
|
| + # and “and $~0x1f, %eXX” |
|
Brad Chen
2012/10/04 17:26:04
Why repeat the opcode "and" helpful in these comme
khim
2012/10/05 08:22:53
Tried to make easier to see that third command may
|
| + # add “and RBASE, %rXX” |
| + # jmpq “*%rXX” or callq “*%rXX” |
| + # Note: first "and $~0x1f, %eXX" is a normal instruction and as such will |
| + # detect case where %rbp/%rsp is illegally modified. |
|
Brad Chen
2012/10/04 17:26:04
Who will detect this case? I don't understand the
khim
2012/10/05 08:22:53
When this ragel machine will be combined with norm
|
| + # |
| + # There are number of variants present which differ by the REX prefix usage: |
|
Brad Chen
2012/10/04 17:26:04
Where are the variants?
khim
2012/10/05 08:22:53
Below. Should I write that?
|
| + # we need to make sure “%eXX” in “and”, “%rXX” in “add”, and “%eXX” in “jmpq” |
| + # or “callq” is the same register and it's much simpler to do if one single |
| + # action handles only fixed number of bytes. |
| + # |
| + # Additional complication arises because x86-64 contains two different “add” |
| + # instruction: with “0x01” and “0x03” opcode. Both are in use in the wild |
|
Brad Chen
2012/10/04 17:26:04
Please avoid colorful language such as "in use in
khim
2012/10/05 08:22:53
The only question is: how to determine if I wrote
|
| + # thus we can not support just one form. They differ in the direction used: |
| + # both can add “A” and “B” but one of them stores the result in “A” and other |
| + # stores the result in “B” (see AMD/Intel manual for clarification). |
| + # |
| + # REGISTER USAGE ABBREVIATIONS: |
| + # E32: legacy ia32 registers (all eight: %eax to %edi) |
| + # R32: 64-bit counterparts for legacy 386 registers (%rax to %rdi) |
| + # R64: new amd64 registers (only seven: %r8 to %r14) |
| + # R32: 32-bit counterparts for new amd64 registers (%r8d to %r14d) |
|
Brad Chen
2012/10/04 17:26:04
You have defined R32 twice. I think you mean "E32"
khim
2012/10/05 08:22:53
Yes, and done.
|
| + # RBASE: %r15 (used as “base of untrusted world” in NaCl for amd64) |
| naclcall_or_nacljmp = |
| - # and $~0x1f, %eax/%ecx/%edx/%ebx/%esp/%ebp/%esi/%edi |
| + # This block encodes call and jump instructions of the form: |
| + # 0: 83 e_ e0 and $~0x1f,E32 |
| + # 3: 4_ 01 f_ add RBASE,R32 |
| + # 6: ff e_ jmpq *R32 |
| + #### INSTRUCTION ONE (three bytes) |
| + # and $~0x1f, E32 |
| (0x83 (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7) 0xe0 |
| - # add %r15,%rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi |
| + #### INSTRUCTION TWO (three bytes) |
| + # add RBASE, R32 (0x01 opcode) |
| b_0100_11x0 0x01 (0xf8|0xf9|0xfa|0xfb|0xfc|0xfd|0xfe|0xff) |
| - # callq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi |
| + #### INSTRUCTION THREE.c (three bytes plus optional REX prefix) |
|
Brad Chen
2012/10/04 17:26:04
three or two bytes?
khim
2012/10/05 08:22:53
Done.
|
| + # callq R32 |
| ((REX_WRX? 0xff (0xd0|0xd1|0xd2|0xd3|0xd4|0xd5|0xd6|0xd7)) | |
| - # jmpq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi |
| + #### INSTRUCTION THREE.j (three bytes plus optional REX prefix) |
|
Brad Chen
2012/10/04 17:26:04
Shouldn't 'three' be 'two' here too? Looks like 'j
khim
2012/10/05 08:22:53
Replaced "j" with "jmp".
Now it's "INSTRUCTION TH
|
| + # jmpq R32 |
| (REX_WRX? 0xff (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7)))) |
| + # This action first compares register numbers in three atomic instructions |
|
Brad Chen
2012/10/04 17:26:04
What do you mean by 'atomic instructions'? That me
khim
2012/10/05 08:22:53
atomic -> component.
|
| + # described above, then it redefines the range of the super-instruction to |
| + # include the preceding sandboxing sequence and invalidates jump targets on |
| + # the interior of the super-instructions and finally clears “the restricted |
| + # register” variable. |
| + # |
| + # “Magic numbers” correspond to the structure of this particular variant of |
| + # the superinstruction. |
|
Brad Chen
2012/10/04 17:26:04
Please be consistent; use either 'super-instructio
khim
2012/10/05 08:22:53
Done.
|
| @{ |
| instruction_start -= 6; |
| if (RMFromModRM(instruction_start[1]) != |
| RMFromModRM(instruction_start[5]) || |
| RMFromModRM(instruction_start[1]) != RMFromModRM(*current_position)) |
| instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 3); |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 6); |
| + MakeJumpTargetInvalid((instruction_start - data) + 3, valid_targets); |
| + MakeJumpTargetInvalid((instruction_start - data) + 6, valid_targets); |
| restricted_register = NO_REG; |
| } | |
| - # and $~0x1f, %eax/%ecx/%edx/%ebx/%esp/%ebp/%esi/%edi |
| + # This block encodes call and jump instructions of the form: |
| + # 0: 83 e_ e0 and $~0x1f,E32 |
| + # 3: 4_ 03 f_ add RBASE,R32 |
| + # 6: ff e_ jmpq *R32 |
| + #### INSTRUCTION ONE (three bytes) |
| + # and $~0x1f, E32 |
| (0x83 (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7) 0xe0 |
| - # add %r15,%rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi |
| + #### INSTRUCTION TWO (three bytes) |
| + # add RBASE, R32 (0x03 opcode) |
| b_0100_10x1 0x03 (0xc7|0xcf|0xd7|0xdf|0xe7|0xef|0xf7|0xff) |
| - # callq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi |
| + #### INSTRUCTION THREE.c (three bytes plus optional REX prefix) |
|
Brad Chen
2012/10/04 17:26:04
Here again I think you mean 'two' instead of 'thre
khim
2012/10/05 08:22:53
Done.
|
| + # callq R32 |
| ((REX_WRX? 0xff (0xd0|0xd1|0xd2|0xd3|0xd4|0xd5|0xd6|0xd7)) | |
| - # jmpq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi |
| + #### INSTRUCTION THREE.j (three bytes plus optional REX prefix) |
| + # jmpq R32 |
| (REX_WRX? 0xff (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7)))) |
| + # This action first compares register numbers in three atomic instructions |
| + # described above, then it redefines the range of the super-instruction to |
| + # include the preceding sandboxing sequence and invalidates jump targets on |
| + # the interior of the super-instructions and finally clears “the restricted |
| + # register” variable. |
| + # |
| + # “Magic numbers” correspond to the structure of this particular variant of |
| + # the superinstruction. |
|
Brad Chen
2012/10/04 17:26:04
This action is identical to the previous action. P
khim
2012/10/05 08:22:53
All actions are different: where actions are ident
|
| @{ |
| instruction_start -= 6; |
| if (RMFromModRM(instruction_start[1]) != |
| RegFromModRM(instruction_start[5]) || |
| RMFromModRM(instruction_start[1]) != RMFromModRM(*current_position)) |
| instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 3); |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 6); |
| + MakeJumpTargetInvalid((instruction_start - data) + 3, valid_targets); |
| + MakeJumpTargetInvalid((instruction_start - data) + 6, valid_targets); |
| restricted_register = NO_REG; |
| } | |
| - # rex.R?X? and $~0x1f, %eax/%ecx/%edx/%ebx/%esp/%ebp/%esi/%edi |
| + # This block encodes call and jump instructions of the form: |
| + # 0: 4_ 83 e_ e0 and $~0x1f,E32 |
| + # 4: 4_ 01 f_ add RBASE,R32 |
| + # 7: ff e_ jmpq *R32 |
| + #### INSTRUCTION ONE (four bytes) |
| + # and $~0x1f, E32 |
| ((REX_RX 0x83 (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7) 0xe0 |
| - # add %r15,%rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi |
| + #### INSTRUCTION TWO (three bytes) |
| + # add RBASE, R32 (0x01 opcode) |
| b_0100_11x0 0x01 (0xf8|0xf9|0xfa|0xfb|0xfc|0xfd|0xfe|0xff) |
| - # callq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi |
| + #### INSTRUCTION THREE.c (three bytes plus optional REX prefix) |
| + # callq R32 |
| ((REX_WRX? 0xff (0xd0|0xd1|0xd2|0xd3|0xd4|0xd5|0xd6|0xd7)) | |
| - # jmpq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi |
| + # jmpq R32 |
| (REX_WRX? 0xff (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7)))) | |
| - # and $~0x1f, %r8d/%r9d/%r10d/%r11d/%r12d/%r13d/%r14d |
| + # This block encodes call and jump instructions of the form: |
| + # 0: 4_ 83 e_ e0 and $~0x1f,E64 |
| + # 4: 4_ 01 f_ add RBASE,R64 |
| + # 7: 4_ ff e_ jmpq *R64 |
| + #### INSTRUCTION ONE (four bytes) |
| + # and $~0x1f, E64 |
| (b_0100_0xx1 0x83 (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6) 0xe0 |
| - # add %r15, %r8d/%r9d/%r10d/%r11d/%r12d/%r13d/%r14d |
| + #### INSTRUCTION TWO (three bytes) |
| + # add RBASE, R64 (0x01 opcode) |
| b_0100_11x1 0x01 (0xf8|0xf9|0xfa|0xfb|0xfc|0xfd|0xfe) |
| - # callq %r8/%r9/%r10/%r11/%r12/%r13/%r14 |
| + #### INSTRUCTION THREE.c (four bytes) |
| + # callq R64 |
| ((b_0100_xxx1 0xff (0xd0|0xd1|0xd2|0xd3|0xd4|0xd5|0xd6)) | |
| - # jmpq %r8/%r9/%r10/%r11/%r12/%r13/%r14 |
| + #### INSTRUCTION THREE.j (four bytes) |
| + # jmpq R64 |
| (b_0100_xxx1 0xff (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6))))) |
| + # This action first compares register numbers in three atomic instructions |
| + # described above, then it redefines the range of the super-instruction to |
| + # include the preceding sandboxing sequence and invalidates jump targets on |
| + # the interior of the super-instructions and finally clears “the restricted |
| + # register” variable. |
| + # |
| + # “Magic numbers” correspond to the structure of this particular variant of |
| + # the superinstruction (we group actions with identical “magic numbers”). |
| @{ |
|
Brad Chen
2012/10/04 17:26:04
This is much too repetitive. You are increasing my
khim
2012/10/05 08:22:53
You can not make the description of the instructio
|
| instruction_start -= 7; |
| if (RMFromModRM(instruction_start[2]) != |
| RMFromModRM(instruction_start[6]) || |
| RMFromModRM(instruction_start[2]) != RMFromModRM(*current_position)) |
| instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 4); |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 7); |
| + MakeJumpTargetInvalid((instruction_start - data) + 4, valid_targets); |
| + MakeJumpTargetInvalid((instruction_start - data) + 7, valid_targets); |
| restricted_register = NO_REG; |
| } | |
| - # rex.R?X? and $~0x1f, %eax/%ecx/%edx/%ebx/%esp/%ebp/%esi/%edi |
| + # This block encodes call and jump instructions of the form: |
|
Brad Chen
2012/10/04 17:26:04
Here I think you mean "super-instruction" not "ins
khim
2012/10/05 08:22:53
Done.
|
| + # 0: 4_ 83 e_ e0 and $~0x1f,E32 |
| + # 4: 4_ 03 f_ add RBASE,R32 |
| + # 7: ff e_ jmpq *R32 |
| + #### INSTRUCTION ONE (four bytes) |
| + # and $~0x1f, E32 |
| ((REX_RX 0x83 (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7) 0xe0 |
| - # add %r15,%rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi |
| + #### INSTRUCTION TWO (three bytes) |
| + # add RBASE, R32 (0x03 opcode) |
| b_0100_10x1 0x03 (0xc7|0xcf|0xd7|0xdf|0xe7|0xef|0xf7|0xff) |
| - # callq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi |
| + #### INSTRUCTION THREE.c (three bytes plus optional REX prefix) |
|
Brad Chen
2012/10/04 17:26:04
'two'?
khim
2012/10/05 08:22:53
Done.
|
| + # callq R32 |
| ((REX_WRX? 0xff (0xd0|0xd1|0xd2|0xd3|0xd4|0xd5|0xd6|0xd7)) | |
| - # jmpq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi |
| + # jmpq R32 |
| (REX_WRX? 0xff (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7)))) | |
|
Brad Chen
2012/10/04 17:26:04
It's really hard to figure out what parenthetical
khim
2012/10/05 08:22:53
Indeed: indentation was mixed up, sorry.
Fixed.
|
| - # and $~0x1f, %r8d/%r9d/%r10d/%r11d/%r12d/%r13d/%r14d |
| + # This block encodes call and jump instructions of the form: |
| + # 0: 4_ 83 e_ e0 and $~0x1f,E64 |
| + # 4: 4_ 03 f_ add RBASE,R64 |
| + # 7: 4_ ff e_ jmpq *R64 |
| + #### INSTRUCTION ONE (four bytes) |
| + # and $~0x1f, E64 |
| (b_0100_0xx1 0x83 (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6) 0xe0 |
| - # add %r15, %r8d/%r9d/%r10d/%r11d/%r12d/%r13d/%r14d |
| + #### INSTRUCTION TWO (three bytes) |
| + # add RBASE, R64 (0x03 opcode) |
| b_0100_11x1 0x03 (0xc7|0xcf|0xd7|0xdf|0xe7|0xef|0xf7) |
| - # callq %r8/%r9/%r10/%r11/%r12/%r13/%r14 |
| + #### INSTRUCTION THREE.c (four bytes) |
| + # callq R64 |
| ((b_0100_xxx1 0xff (0xd0|0xd1|0xd2|0xd3|0xd4|0xd5|0xd6)) | |
| - # jmpq %r8/%r9/%r10/%r11/%r12/%r13/%r14 |
| + #### INSTRUCTION THREE.j (four bytes) |
| + # jmpq R64 |
| (b_0100_xxx1 0xff (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6))))) |
| + # This action first compares register numbers in three atomic instructions |
|
Brad Chen
2012/10/04 17:26:04
Every place you find yourself repeating a comment
khim
2012/10/05 08:22:53
Done.
|
| + # described above, then it redefines the range of the super-instruction to |
| + # include the preceding sandboxing sequence and invalidates jump targets on |
| + # the interior of the super-instructions and finally clears “the restricted |
| + # register” variable. |
| + # |
| + # “Magic numbers” correspond to the structure of this particular variant of |
| + # the superinstruction (we group actions with identical “magic numbers”). |
| @{ |
| instruction_start -= 7; |
| if (RMFromModRM(instruction_start[2]) != |
| RegFromModRM(instruction_start[6]) || |
| RMFromModRM(instruction_start[2]) != RMFromModRM(*current_position)) |
| instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 4); |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 7); |
| + MakeJumpTargetInvalid((instruction_start - data) + 4, valid_targets); |
| + MakeJumpTargetInvalid((instruction_start - data) + 7, valid_targets); |
| restricted_register = NO_REG; |
| }; |
| @@ -289,62 +398,122 @@ |
| data16rep 0xa5 | # movsw %es:(%rdi),%ds:(%rsi) |
| rep? REXW_NONE? 0xa5 ; # movs[lq] %es:(%rdi),%ds:(%rsi) |
| + # Superinstruction which handle instructions which require sandboxed %rsi. |
| + # |
| + # There are two variants which handle spurious REX prefixes. |
|
Brad Chen
2012/10/04 17:26:04
In what sense are the REX prefixes spurious? This
khim
2012/10/05 08:22:53
They don't change the meaning of the instruction b
|
| + # |
| + # Note that both “0x89 0xf6” and “0x8b 0xf6” encode “mov %esi,%esi”: |
| + # “mov” with opcode “0x89” moves from “A” to “B” while “mov” with opcode |
| + # “0x8b” moves from “B” to “A” but when “A” and “B” happen to denote the |
| + # same register there are no functional difference between these opcodes. |
| sandbox_instruction_rsi_no_rdi = |
| (0x89 | 0x8b) 0xf6 . # mov %esi,%esi |
|
Brad Chen
2012/10/04 17:26:04
What does '.' mean here? How is it different from
khim
2012/10/05 08:22:53
"." means concatenation which is default action, t
|
| 0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi |
| string_instruction_rsi_no_rdi |
| + # This action redefines the range of the super-instruction to include the |
|
Brad Chen
2012/10/04 17:26:04
This same identical comment is repeated at least f
khim
2012/10/05 08:22:53
Done.
|
| + # preceding sandboxing sequence then invalidates jump targets on the |
| + # interior of the super-instructions and finally clears “the restricted |
| + # register” variable. |
| + # |
| + # “Magic numbers” correspond to the structure of this particular variant of |
| + # the superinstruction. |
| @{ |
| instruction_start -= 6; |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 2); |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 6); |
| + MakeJumpTargetInvalid((instruction_start - data) + 2, valid_targets); |
| + MakeJumpTargetInvalid((instruction_start - data) + 6, valid_targets); |
| restricted_register = NO_REG; |
| } | |
| REX_X (0x89 | 0x8b) 0xf6 . # mov %esi,%esi |
| 0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi |
| string_instruction_rsi_no_rdi |
| + # This action redefines the range of the super-instruction to include the |
| + # preceding sandboxing sequence then invalidates jump targets on the |
| + # interior of the super-instructions and finally clears “the restricted |
| + # register” variable. |
| + # |
| + # “Magic numbers” correspond to the structure of this particular variant of |
| + # the superinstruction. |
| @{ |
| instruction_start -= 7; |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 3); |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 7); |
| + MakeJumpTargetInvalid((instruction_start - data) + 3, valid_targets); |
| + MakeJumpTargetInvalid((instruction_start - data) + 7, valid_targets); |
| restricted_register = NO_REG; |
| }; |
| + # Superinstruction which handle instructions which require sandboxed %rdi. |
| + # |
| + # There are two variants which handle spurious REX prefixes. |
| + # |
| + # Note that both “0x89 0xff” and “0x8b 0xff” encode “mov %edi,%edi”: |
| + # “mov” with opcode “0x89” moves from “A” to “B” while “mov” with opcode |
| + # “0x8b” moves from “B” to “A” but when “A” and “B” happen to denote the |
| + # same register there are no functional difference between these opcodes. |
| sandbox_instruction_rdi_no_rsi = |
| (0x89 | 0x8b) 0xff . # mov %edi,%edi |
| 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi |
| (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction) |
| + # This action redefines the range of the super-instruction to include the |
| + # preceding sandboxing sequence then invalidates jump targets on the |
| + # interior of the super-instructions and finally clears “the restricted |
| + # register” variable. |
| + # |
| + # “Magic numbers” correspond to the structure of this particular variant of |
| + # the superinstruction. |
| @{ |
| instruction_start -= 6; |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 2); |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 6); |
| + MakeJumpTargetInvalid((instruction_start - data) + 2, valid_targets); |
| + MakeJumpTargetInvalid((instruction_start - data) + 6, valid_targets); |
| restricted_register = NO_REG; |
| } | |
| REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi |
| 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi |
| (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction) |
| + # This action redefines the range of the super-instruction to include the |
| + # preceding sandboxing sequence then invalidates jump targets on the |
| + # interior of the super-instructions and finally clears “the restricted |
| + # register” variable. |
| + # |
| + # “Magic numbers” correspond to the structure of this particular variant of |
| + # the superinstruction. |
| @{ |
| instruction_start -= 7; |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 3); |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 7); |
| + MakeJumpTargetInvalid((instruction_start - data) + 3, valid_targets); |
| + MakeJumpTargetInvalid((instruction_start - data) + 7, valid_targets); |
| restricted_register = NO_REG; |
| }; |
| - # String instructions which use both %ds:(%rsi) and %ds:(%rdi) |
| + # Superinstruction which handle instructions which require both sandboxed %rsi |
| + # and sandboxed %rdi. |
| + # |
| + # There are four variants which handle spurious REX prefixes. |
| + # |
| + # Note that both “0x89 0xf6” and “0x8b 0xf6” encode “mov %esi,%esi” while both |
| + # “0x89 0xff” and “0x8b 0xff” encode “mov %edi,%edi”: “mov” with opcode “0x89” |
| + # moves from “A” to “B” while “mov” with opcode “0x8b” moves from “B” to “A” |
| + # but when “A” and “B” happen to denote the same register there are no |
| + # functional difference between these opcodes. |
| sandbox_instruction_rsi_rdi = |
| (0x89 | 0x8b) 0xf6 . # mov %esi,%esi |
| 0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi |
| (0x89 | 0x8b) 0xff . # mov %edi,%edi |
| 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi |
| string_instruction_rsi_rdi |
| + # This action redefines the range of the super-instruction to include the |
| + # preceding sandboxing sequence then invalidates jump targets on the |
| + # interior of the super-instructions and finally clears “the restricted |
| + # register” variable. |
| + # |
| + # “Magic numbers” correspond to the structure of this particular variant of |
| + # the superinstruction. |
| @{ |
| instruction_start -= 12; |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 2); |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 6); |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 8); |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 12); |
| + MakeJumpTargetInvalid((instruction_start - data) + 2, valid_targets); |
| + MakeJumpTargetInvalid((instruction_start - data) + 6, valid_targets); |
| + MakeJumpTargetInvalid((instruction_start - data) + 8, valid_targets); |
| + MakeJumpTargetInvalid((instruction_start - data) + 12, valid_targets); |
| restricted_register = NO_REG; |
| } | |
| @@ -353,12 +522,19 @@ |
| REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi |
| 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi |
| string_instruction_rsi_rdi |
| + # This action redefines the range of the super-instruction to include the |
| + # preceding sandboxing sequence then invalidates jump targets on the |
| + # interior of the super-instructions and finally clears “the restricted |
| + # register” variable. |
| + # |
| + # “Magic numbers” correspond to the structure of this particular variant of |
| + # the superinstruction. |
| @{ |
| instruction_start -= 13; |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 2); |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 6); |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 9); |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 13); |
| + MakeJumpTargetInvalid((instruction_start - data) + 2, valid_targets); |
| + MakeJumpTargetInvalid((instruction_start - data) + 6, valid_targets); |
| + MakeJumpTargetInvalid((instruction_start - data) + 9, valid_targets); |
| + MakeJumpTargetInvalid((instruction_start - data) + 13, valid_targets); |
| restricted_register = NO_REG; |
| } | |
| @@ -367,12 +543,19 @@ |
| (0x89 | 0x8b) 0xff . # mov %edi,%edi |
| 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi |
| string_instruction_rsi_rdi |
| + # This action redefines the range of the super-instruction to include the |
| + # preceding sandboxing sequence then invalidates jump targets on the |
| + # interior of the super-instructions and finally clears “the restricted |
| + # register” variable. |
| + # |
| + # “Magic numbers” correspond to the structure of this particular variant of |
| + # the superinstruction. |
| @{ |
| instruction_start -= 13; |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 3); |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 7); |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 9); |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 13); |
| + MakeJumpTargetInvalid((instruction_start - data) + 3, valid_targets); |
| + MakeJumpTargetInvalid((instruction_start - data) + 7, valid_targets); |
| + MakeJumpTargetInvalid((instruction_start - data) + 9, valid_targets); |
| + MakeJumpTargetInvalid((instruction_start - data) + 13, valid_targets); |
| restricted_register = NO_REG; |
| } | |
| @@ -381,12 +564,19 @@ |
| REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi |
| 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi |
| string_instruction_rsi_rdi |
| + # This action redefines the range of the super-instruction to include the |
| + # preceding sandboxing sequence then invalidates jump targets on the |
| + # interior of the super-instructions and finally clears “the restricted |
| + # register” variable. |
| + # |
| + # “Magic numbers” correspond to the structure of this particular variant of |
| + # the superinstruction. |
| @{ |
| instruction_start -= 14; |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 3); |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 7); |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 10); |
| - BitmapClearBit(valid_targets, (instruction_start - data) + 14); |
| + MakeJumpTargetInvalid((instruction_start - data) + 3, valid_targets); |
| + MakeJumpTargetInvalid((instruction_start - data) + 7, valid_targets); |
| + MakeJumpTargetInvalid((instruction_start - data) + 10, valid_targets); |
| + MakeJumpTargetInvalid((instruction_start - data) + 14, valid_targets); |
| restricted_register = NO_REG; |
| }; |
| @@ -406,7 +596,10 @@ |
| # Remove special instructions which are only allowed in special cases. |
| normal_instruction = one_instruction - special_instruction; |
| - # Check if call is properly aligned |
| + # Check if call is properly aligned. |
| + # |
| + # For direct call we explicitly encode all variations. For indirect call |
| + # we accept all the special instructions which ends with indirect call. |
| call_alignment = |
| ((normal_instruction & |
| # Direct call |
| @@ -425,7 +618,7 @@ |
| main := ((call_alignment | normal_instruction | special_instruction) |
| >{ |
| - BitmapSetBit(valid_targets, current_position - data); |
| + MakeJumpTargetValid(current_position - data, valid_targets); |
| } |
| @{ |
| if ((instruction_info_collected & |
| @@ -443,6 +636,7 @@ |
| instruction_start = current_position + 1; |
| instruction_info_collected = 0; |
| SET_REX_PREFIX(FALSE); |
| + /* Top three bis of VEX2 are inverted: see AMD/Intel manual. */ |
| SET_VEX_PREFIX2(0xe0); |
| SET_VEX_PREFIX3(0x00); |
| operand_states = 0; |
| @@ -460,7 +654,7 @@ |
| Bool ValidateChunkAMD64(const uint8_t *data, size_t size, |
| enum validation_options options, |
| const NaClCPUFeaturesX86 *cpu_features, |
| - validation_callback_func user_callback, |
| + ValidationCallbackFunc user_callback, |
| void *callback_data) { |
| bitmap_word valid_targets_small; |
| bitmap_word jump_dests_small; |
| @@ -507,9 +701,9 @@ |
| * 2 bits for register kinds, |
| * 5 bits for register numbers (16 regs plus RIZ). */ |
| uint32_t operand_states = 0; |
| - enum register_name base = NO_REG; |
| - enum register_name index = NO_REG; |
| - enum register_name restricted_register = NO_REG; |
| + enum OperandName base = NO_REG; |
| + enum OperandName index = NO_REG; |
| + enum OperandName restricted_register = NO_REG; |
| uint8_t rex_prefix = FALSE; |
| uint8_t vex_prefix2 = 0xe0; |
| uint8_t vex_prefix3 = 0x00; |