Index: src/trusted/validator_ragel/validator_x86_64.rl |
=================================================================== |
--- src/trusted/validator_ragel/validator_x86_64.rl (revision 9911) |
+++ src/trusted/validator_ragel/validator_x86_64.rl (working copy) |
@@ -11,7 +11,7 @@ |
#include <stdlib.h> |
#include <string.h> |
-#include "native_client/src/trusted/validator_ragel/unreviewed/validator_internal.h" |
+#include "native_client/src/trusted/validator_ragel/validator_internal.h" |
%%{ |
machine x86_64_validator; |
@@ -106,6 +106,10 @@ |
data16rep = (data16 | rep data16 | data16 rep); |
# Special %rbp modifications without required sandboxing |
Brad Chen
2012/10/04 17:26:04
This comment is unclear. Why don't these special m
khim
2012/10/05 08:22:53
These special instructions don't require the sandb
|
+ # |
+ # Note that there are two different opcodes for “mov”: “mov” with opcode |
+ # “0x89” moves from “A” to “B” while “mov” with opcode “0x8b” moves from |
+ # “B” to “A”. |
rbp_modifications = |
(b_0100_10x0 0x89 0xe5) | # mov %rsp,%rbp |
(b_0100_10x0 0x8b 0xec) # | mov %rsp,%rbp |
@@ -124,10 +128,14 @@ |
else |
instruction_info_collected |= UNRESTRICTED_RBP_PROCESSED; |
restricted_register = NO_REG; |
- BitmapClearBit(valid_targets, (instruction_start - data)); |
+ MakeJumpTargetInvalid((instruction_start - data), valid_targets); |
Brad Chen
2012/10/04 17:26:04
Are you going to make the instruction at the targe
khim
2012/10/05 08:22:53
I have no opinion WRT to the best name. We clear t
|
}; |
# Special %rbp modifications without required sandboxing |
Brad Chen
2012/10/04 17:26:04
This line begins a section that is extremely simil
khim
2012/10/05 08:22:53
Actually this is just bad comment. Previous one de
|
+ # |
+ # Note that there are two different opcodes for “mov”: “mov” with opcode |
+ # “0x89” moves from “A” to “B” while “mov” with opcode “0x8b” moves from |
+ # “B” to “A”. |
rsp_modifications = |
(b_0100_10x0 0x89 0xec) | # mov %rbp,%rsp |
(b_0100_10x0 0x8b 0xe5) | # mov %rbp,%rsp |
@@ -147,103 +155,204 @@ |
else |
instruction_info_collected |= UNRESTRICTED_RSP_PROCESSED; |
restricted_register = NO_REG; |
- BitmapClearBit(valid_targets, (instruction_start - data)); |
+ MakeJumpTargetInvalid((instruction_start - data), valid_targets); |
}; |
- # naclcall or nacljmp. Note: first "and $~0x1f, %eXX" is a normal instruction |
- # and as such will detect case where %rbp/%rsp is illegally modified. |
+ # naclcall or nacljmp. These are indirect-jump sequences. They include three |
Brad Chen
2012/10/04 17:26:04
Before getting into any specific instructions ther
khim
2012/10/05 08:22:53
This is validator_internals.html.
|
+ # commands: |
Brad Chen
2012/10/04 17:26:04
How about "These are three-instruction indirection
khim
2012/10/05 08:22:53
Done.
|
+ # and “and $~0x1f, %eXX” |
Brad Chen
2012/10/04 17:26:04
Why repeat the opcode "and" helpful in these comme
khim
2012/10/05 08:22:53
Tried to make easier to see that third command may
|
+ # add “and RBASE, %rXX” |
+ # jmpq “*%rXX” or callq “*%rXX” |
+ # Note: first "and $~0x1f, %eXX" is a normal instruction and as such will |
+ # detect case where %rbp/%rsp is illegally modified. |
Brad Chen
2012/10/04 17:26:04
Who will detect this case? I don't understand the
khim
2012/10/05 08:22:53
When this ragel machine will be combined with norm
|
+ # |
+ # There are number of variants present which differ by the REX prefix usage: |
Brad Chen
2012/10/04 17:26:04
Where are the variants?
khim
2012/10/05 08:22:53
Below. Should I write that?
|
+ # we need to make sure “%eXX” in “and”, “%rXX” in “add”, and “%eXX” in “jmpq” |
+ # or “callq” is the same register and it's much simpler to do if one single |
+ # action handles only fixed number of bytes. |
+ # |
+ # Additional complication arises because x86-64 contains two different “add” |
+ # instruction: with “0x01” and “0x03” opcode. Both are in use in the wild |
Brad Chen
2012/10/04 17:26:04
Please avoid colorful language such as "in use in
khim
2012/10/05 08:22:53
The only question is: how to determine if I wrote
|
+ # thus we can not support just one form. They differ in the direction used: |
+ # both can add “A” and “B” but one of them stores the result in “A” and other |
+ # stores the result in “B” (see AMD/Intel manual for clarification). |
+ # |
+ # REGISTER USAGE ABBREVIATIONS: |
+ # E32: legacy ia32 registers (all eight: %eax to %edi) |
+ # R32: 64-bit counterparts for legacy 386 registers (%rax to %rdi) |
+ # R64: new amd64 registers (only seven: %r8 to %r14) |
+ # R32: 32-bit counterparts for new amd64 registers (%r8d to %r14d) |
Brad Chen
2012/10/04 17:26:04
You have defined R32 twice. I think you mean "E32"
khim
2012/10/05 08:22:53
Yes, and done.
|
+ # RBASE: %r15 (used as “base of untrusted world” in NaCl for amd64) |
naclcall_or_nacljmp = |
- # and $~0x1f, %eax/%ecx/%edx/%ebx/%esp/%ebp/%esi/%edi |
+ # This block encodes call and jump instructions of the form: |
+ # 0: 83 e_ e0 and $~0x1f,E32 |
+ # 3: 4_ 01 f_ add RBASE,R32 |
+ # 6: ff e_ jmpq *R32 |
+ #### INSTRUCTION ONE (three bytes) |
+ # and $~0x1f, E32 |
(0x83 (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7) 0xe0 |
- # add %r15,%rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi |
+ #### INSTRUCTION TWO (three bytes) |
+ # add RBASE, R32 (0x01 opcode) |
b_0100_11x0 0x01 (0xf8|0xf9|0xfa|0xfb|0xfc|0xfd|0xfe|0xff) |
- # callq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi |
+ #### INSTRUCTION THREE.c (three bytes plus optional REX prefix) |
Brad Chen
2012/10/04 17:26:04
three or two bytes?
khim
2012/10/05 08:22:53
Done.
|
+ # callq R32 |
((REX_WRX? 0xff (0xd0|0xd1|0xd2|0xd3|0xd4|0xd5|0xd6|0xd7)) | |
- # jmpq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi |
+ #### INSTRUCTION THREE.j (three bytes plus optional REX prefix) |
Brad Chen
2012/10/04 17:26:04
Shouldn't 'three' be 'two' here too? Looks like 'j
khim
2012/10/05 08:22:53
Replaced "j" with "jmp".
Now it's "INSTRUCTION TH
|
+ # jmpq R32 |
(REX_WRX? 0xff (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7)))) |
+ # This action first compares register numbers in three atomic instructions |
Brad Chen
2012/10/04 17:26:04
What do you mean by 'atomic instructions'? That me
khim
2012/10/05 08:22:53
atomic -> component.
|
+ # described above, then it redefines the range of the super-instruction to |
+ # include the preceding sandboxing sequence and invalidates jump targets on |
+ # the interior of the super-instructions and finally clears “the restricted |
+ # register” variable. |
+ # |
+ # “Magic numbers” correspond to the structure of this particular variant of |
+ # the superinstruction. |
Brad Chen
2012/10/04 17:26:04
Please be consistent; use either 'super-instructio
khim
2012/10/05 08:22:53
Done.
|
@{ |
instruction_start -= 6; |
if (RMFromModRM(instruction_start[1]) != |
RMFromModRM(instruction_start[5]) || |
RMFromModRM(instruction_start[1]) != RMFromModRM(*current_position)) |
instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; |
- BitmapClearBit(valid_targets, (instruction_start - data) + 3); |
- BitmapClearBit(valid_targets, (instruction_start - data) + 6); |
+ MakeJumpTargetInvalid((instruction_start - data) + 3, valid_targets); |
+ MakeJumpTargetInvalid((instruction_start - data) + 6, valid_targets); |
restricted_register = NO_REG; |
} | |
- # and $~0x1f, %eax/%ecx/%edx/%ebx/%esp/%ebp/%esi/%edi |
+ # This block encodes call and jump instructions of the form: |
+ # 0: 83 e_ e0 and $~0x1f,E32 |
+ # 3: 4_ 03 f_ add RBASE,R32 |
+ # 6: ff e_ jmpq *R32 |
+ #### INSTRUCTION ONE (three bytes) |
+ # and $~0x1f, E32 |
(0x83 (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7) 0xe0 |
- # add %r15,%rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi |
+ #### INSTRUCTION TWO (three bytes) |
+ # add RBASE, R32 (0x03 opcode) |
b_0100_10x1 0x03 (0xc7|0xcf|0xd7|0xdf|0xe7|0xef|0xf7|0xff) |
- # callq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi |
+ #### INSTRUCTION THREE.c (three bytes plus optional REX prefix) |
Brad Chen
2012/10/04 17:26:04
Here again I think you mean 'two' instead of 'thre
khim
2012/10/05 08:22:53
Done.
|
+ # callq R32 |
((REX_WRX? 0xff (0xd0|0xd1|0xd2|0xd3|0xd4|0xd5|0xd6|0xd7)) | |
- # jmpq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi |
+ #### INSTRUCTION THREE.j (three bytes plus optional REX prefix) |
+ # jmpq R32 |
(REX_WRX? 0xff (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7)))) |
+ # This action first compares register numbers in three atomic instructions |
+ # described above, then it redefines the range of the super-instruction to |
+ # include the preceding sandboxing sequence and invalidates jump targets on |
+ # the interior of the super-instructions and finally clears “the restricted |
+ # register” variable. |
+ # |
+ # “Magic numbers” correspond to the structure of this particular variant of |
+ # the superinstruction. |
Brad Chen
2012/10/04 17:26:04
This action is identical to the previous action. P
khim
2012/10/05 08:22:53
All actions are different: where actions are ident
|
@{ |
instruction_start -= 6; |
if (RMFromModRM(instruction_start[1]) != |
RegFromModRM(instruction_start[5]) || |
RMFromModRM(instruction_start[1]) != RMFromModRM(*current_position)) |
instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; |
- BitmapClearBit(valid_targets, (instruction_start - data) + 3); |
- BitmapClearBit(valid_targets, (instruction_start - data) + 6); |
+ MakeJumpTargetInvalid((instruction_start - data) + 3, valid_targets); |
+ MakeJumpTargetInvalid((instruction_start - data) + 6, valid_targets); |
restricted_register = NO_REG; |
} | |
- # rex.R?X? and $~0x1f, %eax/%ecx/%edx/%ebx/%esp/%ebp/%esi/%edi |
+ # This block encodes call and jump instructions of the form: |
+ # 0: 4_ 83 e_ e0 and $~0x1f,E32 |
+ # 4: 4_ 01 f_ add RBASE,R32 |
+ # 7: ff e_ jmpq *R32 |
+ #### INSTRUCTION ONE (four bytes) |
+ # and $~0x1f, E32 |
((REX_RX 0x83 (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7) 0xe0 |
- # add %r15,%rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi |
+ #### INSTRUCTION TWO (three bytes) |
+ # add RBASE, R32 (0x01 opcode) |
b_0100_11x0 0x01 (0xf8|0xf9|0xfa|0xfb|0xfc|0xfd|0xfe|0xff) |
- # callq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi |
+ #### INSTRUCTION THREE.c (three bytes plus optional REX prefix) |
+ # callq R32 |
((REX_WRX? 0xff (0xd0|0xd1|0xd2|0xd3|0xd4|0xd5|0xd6|0xd7)) | |
- # jmpq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi |
+ # jmpq R32 |
(REX_WRX? 0xff (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7)))) | |
- # and $~0x1f, %r8d/%r9d/%r10d/%r11d/%r12d/%r13d/%r14d |
+ # This block encodes call and jump instructions of the form: |
+ # 0: 4_ 83 e_ e0 and $~0x1f,E64 |
+ # 4: 4_ 01 f_ add RBASE,R64 |
+ # 7: 4_ ff e_ jmpq *R64 |
+ #### INSTRUCTION ONE (four bytes) |
+ # and $~0x1f, E64 |
(b_0100_0xx1 0x83 (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6) 0xe0 |
- # add %r15, %r8d/%r9d/%r10d/%r11d/%r12d/%r13d/%r14d |
+ #### INSTRUCTION TWO (three bytes) |
+ # add RBASE, R64 (0x01 opcode) |
b_0100_11x1 0x01 (0xf8|0xf9|0xfa|0xfb|0xfc|0xfd|0xfe) |
- # callq %r8/%r9/%r10/%r11/%r12/%r13/%r14 |
+ #### INSTRUCTION THREE.c (four bytes) |
+ # callq R64 |
((b_0100_xxx1 0xff (0xd0|0xd1|0xd2|0xd3|0xd4|0xd5|0xd6)) | |
- # jmpq %r8/%r9/%r10/%r11/%r12/%r13/%r14 |
+ #### INSTRUCTION THREE.j (four bytes) |
+ # jmpq R64 |
(b_0100_xxx1 0xff (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6))))) |
+ # This action first compares register numbers in three atomic instructions |
+ # described above, then it redefines the range of the super-instruction to |
+ # include the preceding sandboxing sequence and invalidates jump targets on |
+ # the interior of the super-instructions and finally clears “the restricted |
+ # register” variable. |
+ # |
+ # “Magic numbers” correspond to the structure of this particular variant of |
+ # the superinstruction (we group actions with identical “magic numbers”). |
@{ |
Brad Chen
2012/10/04 17:26:04
This is much too repetitive. You are increasing my
khim
2012/10/05 08:22:53
You can not make the description of the instructio
|
instruction_start -= 7; |
if (RMFromModRM(instruction_start[2]) != |
RMFromModRM(instruction_start[6]) || |
RMFromModRM(instruction_start[2]) != RMFromModRM(*current_position)) |
instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; |
- BitmapClearBit(valid_targets, (instruction_start - data) + 4); |
- BitmapClearBit(valid_targets, (instruction_start - data) + 7); |
+ MakeJumpTargetInvalid((instruction_start - data) + 4, valid_targets); |
+ MakeJumpTargetInvalid((instruction_start - data) + 7, valid_targets); |
restricted_register = NO_REG; |
} | |
- # rex.R?X? and $~0x1f, %eax/%ecx/%edx/%ebx/%esp/%ebp/%esi/%edi |
+ # This block encodes call and jump instructions of the form: |
Brad Chen
2012/10/04 17:26:04
Here I think you mean "super-instruction" not "ins
khim
2012/10/05 08:22:53
Done.
|
+ # 0: 4_ 83 e_ e0 and $~0x1f,E32 |
+ # 4: 4_ 03 f_ add RBASE,R32 |
+ # 7: ff e_ jmpq *R32 |
+ #### INSTRUCTION ONE (four bytes) |
+ # and $~0x1f, E32 |
((REX_RX 0x83 (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7) 0xe0 |
- # add %r15,%rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi |
+ #### INSTRUCTION TWO (three bytes) |
+ # add RBASE, R32 (0x03 opcode) |
b_0100_10x1 0x03 (0xc7|0xcf|0xd7|0xdf|0xe7|0xef|0xf7|0xff) |
- # callq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi |
+ #### INSTRUCTION THREE.c (three bytes plus optional REX prefix) |
Brad Chen
2012/10/04 17:26:04
'two'?
khim
2012/10/05 08:22:53
Done.
|
+ # callq R32 |
((REX_WRX? 0xff (0xd0|0xd1|0xd2|0xd3|0xd4|0xd5|0xd6|0xd7)) | |
- # jmpq %rax/%rcx/%rdx/%rbx/%rsp/%rbp/%rsi/%rdi |
+ # jmpq R32 |
(REX_WRX? 0xff (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6|0xe7)))) | |
Brad Chen
2012/10/04 17:26:04
It's really hard to figure out what parenthetical
khim
2012/10/05 08:22:53
Indeed: indentation was mixed up, sorry.
Fixed.
|
- # and $~0x1f, %r8d/%r9d/%r10d/%r11d/%r12d/%r13d/%r14d |
+ # This block encodes call and jump instructions of the form: |
+ # 0: 4_ 83 e_ e0 and $~0x1f,E64 |
+ # 4: 4_ 03 f_ add RBASE,R64 |
+ # 7: 4_ ff e_ jmpq *R64 |
+ #### INSTRUCTION ONE (four bytes) |
+ # and $~0x1f, E64 |
(b_0100_0xx1 0x83 (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6) 0xe0 |
- # add %r15, %r8d/%r9d/%r10d/%r11d/%r12d/%r13d/%r14d |
+ #### INSTRUCTION TWO (three bytes) |
+ # add RBASE, R64 (0x03 opcode) |
b_0100_11x1 0x03 (0xc7|0xcf|0xd7|0xdf|0xe7|0xef|0xf7) |
- # callq %r8/%r9/%r10/%r11/%r12/%r13/%r14 |
+ #### INSTRUCTION THREE.c (four bytes) |
+ # callq R64 |
((b_0100_xxx1 0xff (0xd0|0xd1|0xd2|0xd3|0xd4|0xd5|0xd6)) | |
- # jmpq %r8/%r9/%r10/%r11/%r12/%r13/%r14 |
+ #### INSTRUCTION THREE.j (four bytes) |
+ # jmpq R64 |
(b_0100_xxx1 0xff (0xe0|0xe1|0xe2|0xe3|0xe4|0xe5|0xe6))))) |
+ # This action first compares register numbers in three atomic instructions |
Brad Chen
2012/10/04 17:26:04
Every place you find yourself repeating a comment
khim
2012/10/05 08:22:53
Done.
|
+ # described above, then it redefines the range of the super-instruction to |
+ # include the preceding sandboxing sequence and invalidates jump targets on |
+ # the interior of the super-instructions and finally clears “the restricted |
+ # register” variable. |
+ # |
+ # “Magic numbers” correspond to the structure of this particular variant of |
+ # the superinstruction (we group actions with identical “magic numbers”). |
@{ |
instruction_start -= 7; |
if (RMFromModRM(instruction_start[2]) != |
RegFromModRM(instruction_start[6]) || |
RMFromModRM(instruction_start[2]) != RMFromModRM(*current_position)) |
instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; |
- BitmapClearBit(valid_targets, (instruction_start - data) + 4); |
- BitmapClearBit(valid_targets, (instruction_start - data) + 7); |
+ MakeJumpTargetInvalid((instruction_start - data) + 4, valid_targets); |
+ MakeJumpTargetInvalid((instruction_start - data) + 7, valid_targets); |
restricted_register = NO_REG; |
}; |
@@ -289,62 +398,122 @@ |
data16rep 0xa5 | # movsw %es:(%rdi),%ds:(%rsi) |
rep? REXW_NONE? 0xa5 ; # movs[lq] %es:(%rdi),%ds:(%rsi) |
+ # Superinstruction which handle instructions which require sandboxed %rsi. |
+ # |
+ # There are two variants which handle spurious REX prefixes. |
Brad Chen
2012/10/04 17:26:04
In what sense are the REX prefixes spurious? This
khim
2012/10/05 08:22:53
They don't change the meaning of the instruction b
|
+ # |
+ # Note that both “0x89 0xf6” and “0x8b 0xf6” encode “mov %esi,%esi”: |
+ # “mov” with opcode “0x89” moves from “A” to “B” while “mov” with opcode |
+ # “0x8b” moves from “B” to “A” but when “A” and “B” happen to denote the |
+ # same register there are no functional difference between these opcodes. |
sandbox_instruction_rsi_no_rdi = |
(0x89 | 0x8b) 0xf6 . # mov %esi,%esi |
Brad Chen
2012/10/04 17:26:04
What does '.' mean here? How is it different from
khim
2012/10/05 08:22:53
"." means concatenation which is default action, t
|
0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi |
string_instruction_rsi_no_rdi |
+ # This action redefines the range of the super-instruction to include the |
Brad Chen
2012/10/04 17:26:04
This same identical comment is repeated at least f
khim
2012/10/05 08:22:53
Done.
|
+ # preceding sandboxing sequence then invalidates jump targets on the |
+ # interior of the super-instructions and finally clears “the restricted |
+ # register” variable. |
+ # |
+ # “Magic numbers” correspond to the structure of this particular variant of |
+ # the superinstruction. |
@{ |
instruction_start -= 6; |
- BitmapClearBit(valid_targets, (instruction_start - data) + 2); |
- BitmapClearBit(valid_targets, (instruction_start - data) + 6); |
+ MakeJumpTargetInvalid((instruction_start - data) + 2, valid_targets); |
+ MakeJumpTargetInvalid((instruction_start - data) + 6, valid_targets); |
restricted_register = NO_REG; |
} | |
REX_X (0x89 | 0x8b) 0xf6 . # mov %esi,%esi |
0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi |
string_instruction_rsi_no_rdi |
+ # This action redefines the range of the super-instruction to include the |
+ # preceding sandboxing sequence then invalidates jump targets on the |
+ # interior of the super-instructions and finally clears “the restricted |
+ # register” variable. |
+ # |
+ # “Magic numbers” correspond to the structure of this particular variant of |
+ # the superinstruction. |
@{ |
instruction_start -= 7; |
- BitmapClearBit(valid_targets, (instruction_start - data) + 3); |
- BitmapClearBit(valid_targets, (instruction_start - data) + 7); |
+ MakeJumpTargetInvalid((instruction_start - data) + 3, valid_targets); |
+ MakeJumpTargetInvalid((instruction_start - data) + 7, valid_targets); |
restricted_register = NO_REG; |
}; |
+ # Superinstruction which handle instructions which require sandboxed %rdi. |
+ # |
+ # There are two variants which handle spurious REX prefixes. |
+ # |
+ # Note that both “0x89 0xff” and “0x8b 0xff” encode “mov %edi,%edi”: |
+ # “mov” with opcode “0x89” moves from “A” to “B” while “mov” with opcode |
+ # “0x8b” moves from “B” to “A” but when “A” and “B” happen to denote the |
+ # same register there are no functional difference between these opcodes. |
sandbox_instruction_rdi_no_rsi = |
(0x89 | 0x8b) 0xff . # mov %edi,%edi |
0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi |
(string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction) |
+ # This action redefines the range of the super-instruction to include the |
+ # preceding sandboxing sequence then invalidates jump targets on the |
+ # interior of the super-instructions and finally clears “the restricted |
+ # register” variable. |
+ # |
+ # “Magic numbers” correspond to the structure of this particular variant of |
+ # the superinstruction. |
@{ |
instruction_start -= 6; |
- BitmapClearBit(valid_targets, (instruction_start - data) + 2); |
- BitmapClearBit(valid_targets, (instruction_start - data) + 6); |
+ MakeJumpTargetInvalid((instruction_start - data) + 2, valid_targets); |
+ MakeJumpTargetInvalid((instruction_start - data) + 6, valid_targets); |
restricted_register = NO_REG; |
} | |
REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi |
0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi |
(string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction) |
+ # This action redefines the range of the super-instruction to include the |
+ # preceding sandboxing sequence then invalidates jump targets on the |
+ # interior of the super-instructions and finally clears “the restricted |
+ # register” variable. |
+ # |
+ # “Magic numbers” correspond to the structure of this particular variant of |
+ # the superinstruction. |
@{ |
instruction_start -= 7; |
- BitmapClearBit(valid_targets, (instruction_start - data) + 3); |
- BitmapClearBit(valid_targets, (instruction_start - data) + 7); |
+ MakeJumpTargetInvalid((instruction_start - data) + 3, valid_targets); |
+ MakeJumpTargetInvalid((instruction_start - data) + 7, valid_targets); |
restricted_register = NO_REG; |
}; |
- # String instructions which use both %ds:(%rsi) and %ds:(%rdi) |
+ # Superinstruction which handle instructions which require both sandboxed %rsi |
+ # and sandboxed %rdi. |
+ # |
+ # There are four variants which handle spurious REX prefixes. |
+ # |
+ # Note that both “0x89 0xf6” and “0x8b 0xf6” encode “mov %esi,%esi” while both |
+ # “0x89 0xff” and “0x8b 0xff” encode “mov %edi,%edi”: “mov” with opcode “0x89” |
+ # moves from “A” to “B” while “mov” with opcode “0x8b” moves from “B” to “A” |
+ # but when “A” and “B” happen to denote the same register there are no |
+ # functional difference between these opcodes. |
sandbox_instruction_rsi_rdi = |
(0x89 | 0x8b) 0xf6 . # mov %esi,%esi |
0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi |
(0x89 | 0x8b) 0xff . # mov %edi,%edi |
0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi |
string_instruction_rsi_rdi |
+ # This action redefines the range of the super-instruction to include the |
+ # preceding sandboxing sequence then invalidates jump targets on the |
+ # interior of the super-instructions and finally clears “the restricted |
+ # register” variable. |
+ # |
+ # “Magic numbers” correspond to the structure of this particular variant of |
+ # the superinstruction. |
@{ |
instruction_start -= 12; |
- BitmapClearBit(valid_targets, (instruction_start - data) + 2); |
- BitmapClearBit(valid_targets, (instruction_start - data) + 6); |
- BitmapClearBit(valid_targets, (instruction_start - data) + 8); |
- BitmapClearBit(valid_targets, (instruction_start - data) + 12); |
+ MakeJumpTargetInvalid((instruction_start - data) + 2, valid_targets); |
+ MakeJumpTargetInvalid((instruction_start - data) + 6, valid_targets); |
+ MakeJumpTargetInvalid((instruction_start - data) + 8, valid_targets); |
+ MakeJumpTargetInvalid((instruction_start - data) + 12, valid_targets); |
restricted_register = NO_REG; |
} | |
@@ -353,12 +522,19 @@ |
REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi |
0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi |
string_instruction_rsi_rdi |
+ # This action redefines the range of the super-instruction to include the |
+ # preceding sandboxing sequence then invalidates jump targets on the |
+ # interior of the super-instructions and finally clears “the restricted |
+ # register” variable. |
+ # |
+ # “Magic numbers” correspond to the structure of this particular variant of |
+ # the superinstruction. |
@{ |
instruction_start -= 13; |
- BitmapClearBit(valid_targets, (instruction_start - data) + 2); |
- BitmapClearBit(valid_targets, (instruction_start - data) + 6); |
- BitmapClearBit(valid_targets, (instruction_start - data) + 9); |
- BitmapClearBit(valid_targets, (instruction_start - data) + 13); |
+ MakeJumpTargetInvalid((instruction_start - data) + 2, valid_targets); |
+ MakeJumpTargetInvalid((instruction_start - data) + 6, valid_targets); |
+ MakeJumpTargetInvalid((instruction_start - data) + 9, valid_targets); |
+ MakeJumpTargetInvalid((instruction_start - data) + 13, valid_targets); |
restricted_register = NO_REG; |
} | |
@@ -367,12 +543,19 @@ |
(0x89 | 0x8b) 0xff . # mov %edi,%edi |
0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi |
string_instruction_rsi_rdi |
+ # This action redefines the range of the super-instruction to include the |
+ # preceding sandboxing sequence then invalidates jump targets on the |
+ # interior of the super-instructions and finally clears “the restricted |
+ # register” variable. |
+ # |
+ # “Magic numbers” correspond to the structure of this particular variant of |
+ # the superinstruction. |
@{ |
instruction_start -= 13; |
- BitmapClearBit(valid_targets, (instruction_start - data) + 3); |
- BitmapClearBit(valid_targets, (instruction_start - data) + 7); |
- BitmapClearBit(valid_targets, (instruction_start - data) + 9); |
- BitmapClearBit(valid_targets, (instruction_start - data) + 13); |
+ MakeJumpTargetInvalid((instruction_start - data) + 3, valid_targets); |
+ MakeJumpTargetInvalid((instruction_start - data) + 7, valid_targets); |
+ MakeJumpTargetInvalid((instruction_start - data) + 9, valid_targets); |
+ MakeJumpTargetInvalid((instruction_start - data) + 13, valid_targets); |
restricted_register = NO_REG; |
} | |
@@ -381,12 +564,19 @@ |
REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi |
0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi |
string_instruction_rsi_rdi |
+ # This action redefines the range of the super-instruction to include the |
+ # preceding sandboxing sequence then invalidates jump targets on the |
+ # interior of the super-instructions and finally clears “the restricted |
+ # register” variable. |
+ # |
+ # “Magic numbers” correspond to the structure of this particular variant of |
+ # the superinstruction. |
@{ |
instruction_start -= 14; |
- BitmapClearBit(valid_targets, (instruction_start - data) + 3); |
- BitmapClearBit(valid_targets, (instruction_start - data) + 7); |
- BitmapClearBit(valid_targets, (instruction_start - data) + 10); |
- BitmapClearBit(valid_targets, (instruction_start - data) + 14); |
+ MakeJumpTargetInvalid((instruction_start - data) + 3, valid_targets); |
+ MakeJumpTargetInvalid((instruction_start - data) + 7, valid_targets); |
+ MakeJumpTargetInvalid((instruction_start - data) + 10, valid_targets); |
+ MakeJumpTargetInvalid((instruction_start - data) + 14, valid_targets); |
restricted_register = NO_REG; |
}; |
@@ -406,7 +596,10 @@ |
# Remove special instructions which are only allowed in special cases. |
normal_instruction = one_instruction - special_instruction; |
- # Check if call is properly aligned |
+ # Check if call is properly aligned. |
+ # |
+ # For direct call we explicitly encode all variations. For indirect call |
+ # we accept all the special instructions which ends with indirect call. |
call_alignment = |
((normal_instruction & |
# Direct call |
@@ -425,7 +618,7 @@ |
main := ((call_alignment | normal_instruction | special_instruction) |
>{ |
- BitmapSetBit(valid_targets, current_position - data); |
+ MakeJumpTargetValid(current_position - data, valid_targets); |
} |
@{ |
if ((instruction_info_collected & |
@@ -443,6 +636,7 @@ |
instruction_start = current_position + 1; |
instruction_info_collected = 0; |
SET_REX_PREFIX(FALSE); |
+ /* Top three bis of VEX2 are inverted: see AMD/Intel manual. */ |
SET_VEX_PREFIX2(0xe0); |
SET_VEX_PREFIX3(0x00); |
operand_states = 0; |
@@ -460,7 +654,7 @@ |
Bool ValidateChunkAMD64(const uint8_t *data, size_t size, |
enum validation_options options, |
const NaClCPUFeaturesX86 *cpu_features, |
- validation_callback_func user_callback, |
+ ValidationCallbackFunc user_callback, |
void *callback_data) { |
bitmap_word valid_targets_small; |
bitmap_word jump_dests_small; |
@@ -507,9 +701,9 @@ |
* 2 bits for register kinds, |
* 5 bits for register numbers (16 regs plus RIZ). */ |
uint32_t operand_states = 0; |
- enum register_name base = NO_REG; |
- enum register_name index = NO_REG; |
- enum register_name restricted_register = NO_REG; |
+ enum OperandName base = NO_REG; |
+ enum OperandName index = NO_REG; |
+ enum OperandName restricted_register = NO_REG; |
uint8_t rex_prefix = FALSE; |
uint8_t vex_prefix2 = 0xe0; |
uint8_t vex_prefix3 = 0x00; |