Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3)

Side by Side Diff: src/trusted/validator_ragel/unreviewed/validator_x86_64.rl

Issue 11000033: Move validator_x86_XX.rl out of unreviewed. (Closed) Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client/
Patch Set: Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 /*
2 * Copyright (c) 2012 The Native Client Authors. All rights reserved.
3 * Use of this source code is governed by a BSD-style license that can be
4 * found in the LICENSE file.
5 */
6
7 /*
8 * This is the core of amd64-mode validator. Please note that this file
9 * combines ragel machine description and C language actions. Please read
10 * validator_internals.html first to understand how the whole thing is built:
11 * it explains how the byte sequences are constructed, what constructs like
12 * "@{}" or "REX_WRX?" mean, etc.
13 */
14
15 #include <assert.h>
16 #include <errno.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21
22 #include "native_client/src/trusted/validator_ragel/bitmap.h"
23 #include "native_client/src/trusted/validator_ragel/unreviewed/validator_interna l.h"
24
25 %%{
26 machine x86_64_validator;
27 alphtype unsigned char;
28 variable p current_position;
29 variable pe end_of_bundle;
30 variable eof end_of_bundle;
31 variable cs current_state;
32
33 include byte_machine "byte_machines.rl";
34
35 include prefix_actions
36 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
37 include prefixes_parsing_noaction
38 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
39 include rex_actions
40 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
41 include rex_parsing
42 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
43 include vex_actions_amd64
44 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
45 include vex_parsing_amd64
46 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
47 include displacement_fields_actions
48 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
49 include displacement_fields_parsing
50 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
51 include modrm_actions_amd64
52 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
53 include modrm_parsing_amd64
54 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
55 include operand_actions_amd64
56 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
57 include immediate_fields_actions
58 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
59 include immediate_fields_parsing_amd64
60 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
61 include relative_fields_validator_actions
62 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
63 include relative_fields_parsing
64 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
65 include cpuid_actions
66 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
67
68 action check_access {
69 CheckAccess(instruction_begin - data, base, index, restricted_register,
70 valid_targets, &instruction_info_collected);
71 }
72
73 # Action which marks last byte as not immediate. Most 3DNow! instructions,
74 # some AVX and XOP instructions have this proerty. It's referenced by
75 # decode_x86_32 machine in [autogenerated] "validator_x86_32_instruction.rl"
76 # file.
77 action last_byte_is_not_immediate {
78 instruction_info_collected |= LAST_BYTE_IS_NOT_IMMEDIATE;
79 }
80
81 action modifiable_instruction {
82 instruction_info_collected |= MODIFIABLE_INSTRUCTION;
83 }
84
85 action process_0_operands {
86 Process0Operands(&restricted_register, &instruction_info_collected);
87 }
88 action process_1_operand {
89 Process1Operand(&restricted_register, &instruction_info_collected,
90 rex_prefix, operand_states);
91 }
92 action process_1_operand_zero_extends {
93 Process1OperandZeroExtends(&restricted_register,
94 &instruction_info_collected, rex_prefix,
95 operand_states);
96 }
97 action process_2_operands {
98 Process2Operands(&restricted_register, &instruction_info_collected,
99 rex_prefix, operand_states);
100 }
101 action process_2_operands_zero_extends {
102 Process2OperandsZeroExtends(&restricted_register,
103 &instruction_info_collected, rex_prefix,
104 operand_states);
105 }
106
107 include decode_x86_64 "validator_x86_64_instruction.rl";
108
109 # Special %rbp modifications - the ones which don't need a sandboxing.
110 #
111 # Note that there are two different opcodes for "mov": in x86-64 there are two
112 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move
113 # from REG field to RM or in the other direction thus there are two encodings
114 # for the register-to-register move.
115 rbp_modifications =
116 (b_0100_10x0 0x89 0xe5 | # mov %rsp,%rbp
117 b_0100_10x0 0x8b 0xec) # mov %rsp,%rbp
118 @process_0_operands;
119
120 # Special instructions used for %rbp sandboxing.
121 #
122 # This is the "second half" of the %rbp sandboxing. Any zero-extending
123 # instruction which stores the data in %ebp can be first half, but unlike
124 # the situation with other "normal" registers you can not just write to
125 # %ebp and continue: such activity MUST restore the status quo immediately
126 # via one of these instructions.
127 rbp_sandboxing =
128 (b_0100_11x0 0x01 0xfd | # add %r15,%rbp
129 b_0100_10x1 0x03 0xef | # add %r15,%rbp
130 # Note that unlike %rsp case, there is no 'lea (%rbp,%r15,1),%rbp'
131 # instruction (it gets assembled as 'lea 0x00(%rbp,%r15,1),%rbp').
132 0x4a 0x8d 0x6c 0x3d 0x00 | # lea 0x00(%rbp,%r15,1),%rbp
133 0x4a 0x8d 0xac 0x3d 0x00 0x00 0x00 0x00) # lea 0x00000000(%rbp,%r15,1),%rbp
134 # Note: restricted_register keeps the restricted register as explained in
135 # http://www.chromium.org/nativeclient/design-documents/nacl-sfi-model-on-x8 6-64-systems
136 #
137 # "Normal" instructions can not be used in a place where %rbp is restricted.
138 # But since these instructions are "second half" of the %rbp sandboxing they
139 # can be used *only* when %rbp is restricted.
140 #
141 # That is (normal instruction):
142 # mov %eax,%ebp
143 # mov %esi,%edi <- Error: %ebp is restricted
144 # vs
145 # mov %esi,%edi
146 # add %r15,%rbp <- Error: %ebp is *not* restricted
147 # vs
148 # mov %eax,%ebp
149 # add %r15,%rbp <- Ok: %rbp is restricted as it should be
150 #
151 # Check this precondition and mark the beginning of the instruction as
152 # invalid jump for target.
153 @{ if (restricted_register == REG_RBP)
154 instruction_info_collected |= RESTRICTED_REGISTER_USED;
155 else
156 instruction_info_collected |= UNRESTRICTED_RBP_PROCESSED;
157 restricted_register = NO_REG;
158 UnmarkValidJumpTarget((instruction_begin - data), valid_targets);
159 };
160
161 # Special %rsp modifications - the ones which don't need a sandboxing.
162 #
163 # Note that there are two different opcodes for "mov": in x86-64 there are two
164 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move
165 # from REG field to RM or in the other direction thus there are two encodings
166 # for the register-to-register move.
167 rsp_modifications =
168 (b_0100_10x0 0x89 0xec | # mov %rbp,%rsp
169 b_0100_10x0 0x8b 0xe5 | # mov %rbp,%rsp
170 # Superfluous bits are not supported:
171 # http://code.google.com/p/nativeclient/issues/detail?id=3012
172 b_0100_1000 0x83 0xe4 (0x80 .. 0xff)) # and $XXX,%rsp
173 @process_0_operands;
174
175 # Special instructions used for %rsp sandboxing.
176 #
177 # This is the "second half" of the %rsp sandboxing. Any zero-extending
178 # instruction which stores the data in %esp can be first half, but unlike
179 # the situation with other "normal" registers you can not just write to
180 # %esp and continue: such activity MUST restore the status quo immediately
181 # via one of these instructions.
182 rsp_sandboxing =
183 (b_0100_11x0 0x01 0xfc | # add %r15,%rsp
184 b_0100_10x1 0x03 0xe7 | # add %r15,%rsp
185 # OR can be used as well, see
186 # http://code.google.com/p/nativeclient/issues/detail?id=3070
187 b_0100_11x0 0x09 0xfc | # or %r15,%rsp
188 b_0100_10x1 0x0b 0xe7 | # or %r15,%rsp
189 0x4a 0x8d 0x24 0x3c | # lea (%rsp,%r15,1),%rsp
190 0x4a 0x8d 0x64 0x3c 0x00 | # lea 0x00(%rsp,%r15,1),%rsp
191 0x4a 0x8d 0xa4 0x3c 0x00 0x00 0x00 0x00) # lea 0x00000000(%rsp,%r15,1),%rsp
192 # Note: restricted_register keeps the restricted register as explained in
193 # http://www.chromium.org/nativeclient/design-documents/nacl-sfi-model-on-x8 6-64-systems
194 #
195 # "Normal" instructions can not be used in a place where %rsp is restricted.
196 # But since these instructions are "second half" of the %rsp sandboxing they
197 # can be used *only* when %rsp is restricted.
198 #
199 # That is (normal instruction):
200 # mov %eax,%esp
201 # mov %esi,%edi <- Error: %esp is restricted
202 # vs
203 # mov %esi,%edi
204 # add %r15,%rsp <- Error: %esp is *not* restricted
205 # vs
206 # mov %eax,%esp
207 # add %r15,%rsp <- Ok: %rsp is restricted as it should be
208 #
209 # Check this precondition and mark the beginning of the instruction as
210 # invalid jump for target.
211 @{ if (restricted_register == REG_RSP)
212 instruction_info_collected |= RESTRICTED_REGISTER_USED;
213 else
214 instruction_info_collected |= UNRESTRICTED_RSP_PROCESSED;
215 restricted_register = NO_REG;
216 UnmarkValidJumpTarget((instruction_begin - data), valid_targets);
217 };
218
219 # naclcall or nacljmp. These are three-instruction indirection-jump sequences.
220 # and $~0x1f, %eXX
221 # and RBASE, %rXX
222 # jmpq *%rXX (or: callq *%rXX)
223 # Note: first "and $~0x1f, %eXX" is a normal instruction (it can occur not
224 # just as part of the naclcall/nacljmp, but also as a standolene instruction).
225 #
226 # This means that when naclcall_or_nacljmp ragel machine will be combined with
227 # "normal_instruction*" regular action process_1_operand_zero_extends will be
228 # triggered when main ragel machine will accept "and $~0x1f, %eXX" x86-64
229 # instruction. This action will check if %rbp/%rsp is legally modified thus
230 # we don't need to duplicate this logic in naclcall_or_nacljmp ragel machine.
231 #
232 # There are number of variants present which differ by the REX prefix usage:
233 # we need to make sure "%eXX" in "and", "%rXX" in "add", and "%eXX" in "jmpq"
234 # or "callq" is the same register and it's much simpler to do if one single
235 # action handles only fixed number of bytes.
236 #
237 # Additional complication arises because x86-64 contains two different "add"
238 # instruction: with "0x01" and "0x03" opcode. They differ in the direction
239 # used: both can encode "add %src_register, %dst_register", but the first one
240 # uses field REG of the ModR/M byte for the src and field RM of the ModR/M
241 # byte for the dst while last one uses field RM of the ModR/M byte for the src
242 # and field REG of the ModR/M byte for dst. Both should be allowed.
243 #
244 # See AMD/Intel manual for clarification "add" instruction encoding.
245 #
246 # REGISTER USAGE ABBREVIATIONS:
247 # E86: legacy ia32 registers (all eight: %eax to %edi)
248 # R86: 64-bit counterparts for legacy 386 registers (%rax to %rdi)
249 # E64: 32-bit counterparts for new amd64 registers (%r8d to %r14d)
250 # R64: new amd64 registers (only seven: %r8 to %r14)
251 # RBASE: %r15 (used as "base of untrusted world" in NaCl for amd64)
252 naclcall_or_nacljmp =
253 # This block encodes call and jump "superinstruction" of the following form:
254 # 0: 83 e_ e0 and $~0x1f,E86
255 # 3: 4_ 01 f_ add RBASE,R86
256 # 6: ff e_ jmpq *R86
257 #### INSTRUCTION ONE (three bytes)
258 # and $~0x1f, E86
259 (0x83 b_11_100_xxx 0xe0
260 #### INSTRUCTION TWO (three bytes)
261 # add RBASE, R86 (0x01 opcode)
262 b_0100_11x0 0x01 b_11_111_xxx
263 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
264 # callq R86
265 ((REX_WRX? 0xff b_11_010_xxx) |
266 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
267 # jmpq R86
268 (REX_WRX? 0xff b_11_100_xxx)))
269 @{
270 ProcessNaclCallOrJmpAddToRMNoRex(&instruction_info_collected,
271 &instruction_begin, current_position,
272 data, valid_targets);
273 } |
274
275 # This block encodes call and jump "superinstruction" of the following form:
276 # 0: 83 e_ e0 and $~0x1f,E86
277 # 3: 4_ 03 _f add RBASE,R86
278 # 6: ff e_ jmpq *R86
279 #### INSTRUCTION ONE (three bytes)
280 # and $~0x1f, E86
281 (0x83 b_11_100_xxx 0xe0
282 #### INSTRUCTION TWO (three bytes)
283 # add RBASE, R86 (0x03 opcode)
284 b_0100_10x1 0x03 b_11_xxx_111
285 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
286 # callq R86
287 ((REX_WRX? 0xff b_11_010_xxx) |
288 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
289 # jmpq R86
290 (REX_WRX? 0xff b_11_100_xxx)))
291 @{
292 ProcessNaclCallOrJmpAddToRegNoRex(&instruction_info_collected,
293 &instruction_begin, current_position,
294 data, valid_targets);
295 } |
296
297 # This block encodes call and jump "superinstruction" of the following form:
298 # 0: 4_ 83 e_ e0 and $~0x1f,E86
299 # 4: 4_ 01 f_ add RBASE,R86
300 # 7: ff e_ jmpq *R86
301 #### INSTRUCTION ONE (four bytes)
302 # and $~0x1f, E86
303 ((REX_RX 0x83 b_11_100_xxx 0xe0
304 #### INSTRUCTION TWO (three bytes)
305 # add RBASE, R86 (0x01 opcode)
306 b_0100_11x0 0x01 b_11_111_xxx
307 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
308 # callq R86
309 ((REX_WRX? 0xff b_11_010_xxx) |
310 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
311 # jmpq R86
312 (REX_WRX? 0xff b_11_100_xxx))) |
313
314 # This block encodes call and jump "superinstruction" of the following form:
315 # 0: 4_ 83 e_ e0 and $~0x1f,E64
316 # 4: 4_ 01 f_ add RBASE,R64
317 # 7: 4_ ff e_ jmpq *R64
318 #### INSTRUCTION ONE (four bytes)
319 # and $~0x1f, E64
320 (b_0100_0xx1 0x83 (b_11_100_xxx - b_11_100_111) 0xe0
321 #### INSTRUCTION TWO (three bytes)
322 # add RBASE, R64 (0x01 opcode)
323 b_0100_11x1 0x01 (b_11_111_xxx - b_11_111_111)
324 #### INSTRUCTION THREE: call (three bytes)
325 # callq R64
326 ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) |
327 #### INSTRUCTION THREE: jmp (three bytes)
328 # jmpq R64
329 (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111)))))
330 @{
331 ProcessNaclCallOrJmpAddToRMWithRex(&instruction_info_collected,
332 &instruction_begin, current_position,
333 data, valid_targets);
334 } |
335
336 # This block encodes call and jump "superinstruction" of the following form:
337 # 0: 4_ 83 e_ e0 and $~0x1f,E86
338 # 4: 4_ 03 _f add RBASE,R86
339 # 7: ff e_ jmpq *R86
340 #### INSTRUCTION ONE (four bytes)
341 # and $~0x1f, E86
342 ((REX_RX 0x83 b_11_100_xxx 0xe0
343 #### INSTRUCTION TWO (three bytes)
344 # add RBASE, R86 (0x03 opcode)
345 b_0100_10x1 0x03 b_11_xxx_111
346 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
347 # callq R86
348 ((REX_WRX? 0xff b_11_010_xxx) |
349 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
350 # jmpq R86
351 (REX_WRX? 0xff b_11_100_xxx))) |
352
353 # This block encodes call and jump "superinstruction" of the following form:
354 # 0: 4_ 83 e_ e0 and $~0x1f,E64
355 # 4: 4_ 03 _f add RBASE,R64
356 # 7: 4_ ff e_ jmpq *R64
357 #### INSTRUCTION ONE (four bytes)
358 # and $~0x1f, E64
359 (b_0100_0xx1 0x83 (b_11_100_xxx - b_11_100_111) 0xe0
360 #### INSTRUCTION TWO (three bytes)
361 # add RBASE, R64 (0x03 opcode)
362 b_0100_11x1 0x03 (b_11_xxx_111 - b_11_111_111)
363 #### INSTRUCTION THREE: call (three bytes)
364 # callq R64
365 ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) |
366 #### INSTRUCTION THREE: jmp (three bytes)
367 # jmpq R64
368 (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111)))))
369 @{
370 ProcessNaclCallOrJmpAddToRegWithRex(&instruction_info_collected,
371 &instruction_begin, current_position,
372 data, valid_targets);
373 };
374
375 # EMMS/SSE2/AVX instructions which have implicit %ds:(%rsi) operand
376 # maskmovq %mmX,%mmY
377 maskmovq =
378 REX_WRXB? (0x0f 0xf7)
379 @CPUFeature_EMMX modrm_registers;
380 # maskmovdqu %xmmX, %xmmY
381 maskmovdqu =
382 0x66 REX_WRXB? (0x0f 0xf7) @not_data16_prefix
383 @CPUFeature_SSE2 modrm_registers;
384 # vmaskmovdqu %xmmX, %xmmY
385 vmaskmovdqu =
386 ((0xc4 (VEX_RB & VEX_map00001) 0x79 @vex_prefix3) |
387 (0xc5 (0x79 | 0xf9) @vex_prefix_short)) 0xf7
388 @CPUFeature_AVX modrm_registers;
389 mmx_sse_rdi_instruction = maskmovq | maskmovdqu | vmaskmovdqu;
390
391 # Temporary fix: for string instructions combination of data16 and rep(ne)
392 # prefixes is disallowed to mimic old validator behavior.
393 # See http://code.google.com/p/nativeclient/issues/detail?id=1950
394
395 # data16rep = (data16 | rep data16 | data16 rep);
396 # data16condrep = (data16 | condrep data16 | data16 condrep);
397 data16rep = data16;
398 data16condrep = data16;
399
400 # String instructions which use only %ds:(%rsi)
401 string_instruction_rsi_no_rdi =
402 (rep? 0xac | # lods %ds:(%rsi),%al
403 data16rep 0xad | # lods %ds:(%rsi),%ax
404 rep? REXW_NONE? 0xad); # lods %ds:(%rsi),%eax/%rax
405
406 # String instructions which use only %ds:(%rdi)
407 string_instruction_rdi_no_rsi =
408 condrep? 0xae | # scas %es:(%rdi),%al
409 data16condrep 0xaf | # scas %es:(%rdi),%ax
410 condrep? REXW_NONE? 0xaf | # scas %es:(%rdi),%eax/%rax
411
412 rep? 0xaa | # stos %al,%es:(%rdi)
413 data16rep 0xab | # stos %ax,%es:(%rdi)
414 rep? REXW_NONE? 0xab; # stos %eax/%rax,%es:(%rdi)
415
416 # String instructions which use both %ds:(%rsi) and %es:(%rdi)
417 string_instruction_rsi_rdi =
418 condrep? 0xa6 | # cmpsb %es:(%rdi),%ds:(%rsi)
419 data16condrep 0xa7 | # cmpsw %es:(%rdi),%ds:(%rsi)
420 condrep? REXW_NONE? 0xa7 | # cmps[lq] %es:(%rdi),%ds:(%rsi)
421
422 rep? 0xa4 | # movsb %ds:(%rsi),%es:(%rdi)
423 data16rep 0xa5 | # movsw %ds:(%rsi),%es:(%rdi)
424 rep? REXW_NONE? 0xa5; # movs[lq] %ds:(%rsi),%es:(%rdi)
425
426 # "Superinstruction" which includes %rsi sandboxing.
427 #
428 # There are two variants which handle spurious REX prefixes.
429 #
430 # Note that both "0x89 0xf6" and "0x8b 0xf6" encode "mov %edi,%edi": in x86-64
431 # there are two fields in ModR/M byte (REG field and RM field) and "mov" may
432 # be used to move from REG field to RM or in the other direction thus there
433 # are two encodings for the register-to-register move (and since REG and RM
434 # are identical here only opcode differs).
435 sandbox_instruction_rsi_no_rdi =
436 (0x89 | 0x8b) 0xf6 # mov %esi,%esi
437 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
438 string_instruction_rsi_no_rdi
439 @{
440 ExpandSuperinstructionBySandboxingBytes(
441 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);
442 } |
443
444 REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi
445 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
446 string_instruction_rsi_no_rdi
447 @{
448 ExpandSuperinstructionBySandboxingBytes(
449 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);
450 };
451
452 # "Superinstruction" which includes %rdi sandboxing.
453 #
454 # There are two variants which handle spurious REX prefixes.
455 #
456 # Note that both "0x89 0xff" and "0x8b 0xff" encode "mov %edi,%edi": in x86-64
457 # there are two fields in ModR/M byte (REG field and RM field) and "mov" may
458 # be used to move from REG field to RM or in the other direction thus there
459 # are two encodings for the register-to-register move (and since REG and RM
460 # are identical here only opcode differs).
461 sandbox_instruction_rdi_no_rsi =
462 (0x89 | 0x8b) 0xff # mov %edi,%edi
463 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi
464 (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction)
465 @{
466 ExpandSuperinstructionBySandboxingBytes(
467 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);
468 } |
469
470 REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi
471 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi
472 (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction)
473 @{
474 ExpandSuperinstructionBySandboxingBytes(
475 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);
476 };
477
478
479 # "Superinstruction" which includes both %rsi and %rdi sandboxing.
480 #
481 # There are four variants which handle spurious REX prefixes.
482 #
483 # Note that both "0x89 0xf6" and "0x8b 0xf6" encode "mov %esi,%esi" while both
484 # "0x89 0xff" and "0x8b 0xff" encode "mov %edi,%edi": in x86-64 there are two
485 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move
486 # from REG field to RM or in the other direction thus there are two encodings
487 # for the register-to-register move (and since REG and RM are identical here
488 # only opcode differs).
489 sandbox_instruction_rsi_rdi =
490 (0x89 | 0x8b) 0xf6 # mov %esi,%esi
491 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
492 (0x89 | 0x8b) 0xff # mov %edi,%edi
493 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi
494 string_instruction_rsi_rdi
495 @{
496 ExpandSuperinstructionBySandboxingBytes(
497 2 /* mov */ + 4 /* lea */ + 2 /* mov */ + 4 /* lea */,
498 &instruction_begin, data, valid_targets);
499 } |
500
501 (((0x89 | 0x8b) 0xf6 # mov %esi,%esi
502 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
503 REX_X (0x89 | 0x8b) 0xff # mov %edi,%edi
504 0x49 0x8d 0x3c 0x3f) | # lea (%r15,%rdi,1),%rdi
505
506 (REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi
507 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
508 (0x89 | 0x8b) 0xff # mov %edi,%edi
509 0x49 0x8d 0x3c 0x3f)) # lea (%r15,%rdi,1),%rdi
510 string_instruction_rsi_rdi
511 @{
512 ExpandSuperinstructionBySandboxingBytes(
513 2 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */
514 /* == 3 (* mov *) + 4 (* lea *) + 2 (* mov *) + 4 (* lea *) */,
515 &instruction_begin, data, valid_targets);
516 } |
517
518 REX_X (0x89 | 0x8b) 0xf6 . # mov %esi,%esi
519 0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi
520 REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi
521 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi
522 string_instruction_rsi_rdi
523 @{
524 ExpandSuperinstructionBySandboxingBytes(
525 3 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */,
526 &instruction_begin, data, valid_targets);
527 };
528
529 # All the "special" instructions (== instructions which obey non-standard
530 # rules). Three groups:
531 # * %rsp/%rsp related instructions (these instructions are special because
532 # they must be in the range %r15...%r15+4294967295 except momentarily they
533 # can be in the range 0...4294967295)
534 # * string instructions (which can not use %r15 as base and thus need special
535 # handling both in compiler and validator)
536 # * naclcall/nacljmp (indirect jumps need special care)
537 special_instruction =
538 (rbp_modifications |
539 rsp_modifications |
540 rbp_sandboxing |
541 rsp_sandboxing |
542 sandbox_instruction_rsi_no_rdi |
543 sandbox_instruction_rdi_no_rsi |
544 sandbox_instruction_rsi_rdi |
545 naclcall_or_nacljmp)
546 # Mark the instruction as special - currently this information is used only
547 # in tests, but in the future we may use it for dynamic code modification
548 # support.
549 @{
550 instruction_info_collected |= SPECIAL_INSTRUCTION;
551 };
552
553 # Remove special instructions which are only allowed in special cases.
554 normal_instruction = one_instruction - special_instruction;
555
556 # Check if call is properly aligned.
557 #
558 # For direct call we explicitly encode all variations. For indirect call
559 # we accept all the special instructions which ends with indirect call.
560 call_alignment =
561 ((normal_instruction &
562 # Direct call
563 ((data16 REX_RXB? 0xe8 rel16) |
564 (REX_WRXB? 0xe8 rel32) |
565 (data16 REXW_RXB 0xe8 rel32))) |
566 (special_instruction &
567 # Indirect call
568 (any* data16? REX_WRXB? 0xff ((opcode_2 | opcode_3) any* &
569 (modrm_memory | modrm_registers)))))
570 # Call instruction must aligned to the end of bundle. Previously this was
571 # strict requirement, today it's just warning to aid with debugging.
572 @{
573 if (((current_position - data) & kBundleMask) != kBundleMask)
574 instruction_info_collected |= BAD_CALL_ALIGNMENT;
575 };
576
577 # This action calls user's callback (if needed) and cleans up validator's
578 # internal state.
579 #
580 # We call the user callback if there are validation errors or if the
581 # CALL_USER_CALLBACK_ON_EACH_INSTRUCTION option is used.
582 #
583 # After that we move instruction_begin and clean all the variables which
584 # only used in the processing of a single instruction (prefixes, operand
585 # states and instruction_info_collected).
586 action end_of_instruction_cleanup {
587 /* Call user-supplied callback. */
588 instruction_end = current_position + 1;
589 if ((instruction_info_collected & VALIDATION_ERRORS_MASK) ||
590 (options & CALL_USER_CALLBACK_ON_EACH_INSTRUCTION)) {
591 result &= user_callback(
592 instruction_begin, instruction_end,
593 instruction_info_collected |
594 ((restricted_register << RESTRICTED_REGISTER_SHIFT) &
595 RESTRICTED_REGISTER_MASK), callback_data);
596 }
597
598 /* On successful match the instruction_begin must point to the next byte
599 * to be able to report the new offset as the start of instruction
600 * causing error. */
601 instruction_begin = instruction_end;
602
603 /* Mark start of the next instruction as a valid target for jump.
604 * Note: we mark start of the next instruction here, not start of the
605 * current one because memory access check should be able to clear this
606 * bit when restricted register is used. */
607 MarkValidJumpTarget(instruction_begin - data, valid_targets);
608
609 /* Clear variables. */
610 instruction_info_collected = 0;
611 SET_REX_PREFIX(FALSE);
612 /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */
613 SET_VEX_PREFIX2(VEX_R | VEX_X | VEX_B);
614 SET_VEX_PREFIX3(0x00);
615 operand_states = 0;
616 base = 0;
617 index = 0;
618 }
619
620 # This action reports fatal error detected by DFA.
621 action report_fatal_error {
622 result &= user_callback(instruction_begin, current_position,
623 UNRECOGNIZED_INSTRUCTION, callback_data);
624 /*
625 * Process the next bundle: "continue" here is for the "for" cycle in
626 * the ValidateChunkAMD64 function.
627 *
628 * It does not affect the case which we really care about (when code
629 * is validatable), but makes it possible to detect more errors in one
630 * run in tools like ncval.
631 */
632 continue;
633 }
634
635 # This is main ragel machine: it does 99% of validation work. There are only
636 # one thing to do with bundle if this machine accepts the bundle:
637 # * check for the state of the restricted_register at the end of the bundle.
638 # It's an error is %rbp or %rsp is restricted at the end of the bundle.
639 # Additionally if all the bundles are fine you need to check that direct jumps
640 # are corect. Thiis is done in the following way:
641 # * DFA fills two arrays: valid_targets and jump_dests.
642 # * ProcessInvalidJumpTargets checks that "jump_dests & !valid_targets == 0".
643 # All other checks are done here.
644
645 main := ((call_alignment | normal_instruction | special_instruction)
646 @end_of_instruction_cleanup)*
647 $!report_fatal_error;
648
649 }%%
650
651 %% write data;
652
653 enum OperandKind {
654 OPERAND_SANDBOX_IRRELEVANT = 0,
655 /*
656 * Currently we do not distinguish 8bit and 16bit modifications from
657 * OPERAND_SANDBOX_UNRESTRICTED to match the behavior of the old validator.
658 *
659 * 8bit operands must be distinguished from other types because the REX prefix
660 * regulates the choice between %ah and %spl, as well as %ch and %bpl.
661 */
662 OPERAND_SANDBOX_8BIT,
663 OPERAND_SANDBOX_RESTRICTED,
664 OPERAND_SANDBOX_UNRESTRICTED
665 };
666
667 #define SET_OPERAND_NAME(N, S) operand_states |= ((S) << ((N) << 3))
668 #define SET_OPERAND_TYPE(N, T) SET_OPERAND_TYPE_ ## T(N)
669 #define SET_OPERAND_TYPE_OPERAND_TYPE_8_BIT(N) \
670 operand_states |= OPERAND_SANDBOX_8BIT << (5 + ((N) << 3))
671 #define SET_OPERAND_TYPE_OPERAND_TYPE_16_BIT(N) \
672 operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((N) << 3))
673 #define SET_OPERAND_TYPE_OPERAND_TYPE_32_BIT(N) \
674 operand_states |= OPERAND_SANDBOX_RESTRICTED << (5 + ((N) << 3))
675 #define SET_OPERAND_TYPE_OPERAND_TYPE_64_BIT(N) \
676 operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((N) << 3))
677 #define CHECK_OPERAND(N, S, T) \
678 ((operand_states & (0xff << ((N) << 3))) == ((S | (T << 5)) << ((N) << 3)))
679
680 static INLINE void CheckAccess(ptrdiff_t instruction_begin,
681 enum OperandName base,
682 enum OperandName index,
683 uint8_t restricted_register,
684 bitmap_word *valid_targets,
685 uint32_t *instruction_info_collected) {
686 if ((base == REG_RIP) || (base == REG_R15) ||
687 (base == REG_RSP) || (base == REG_RBP)) {
688 if ((index == NO_REG) || (index == REG_RIZ))
689 { /* do nothing. */ }
690 else if (index == restricted_register)
691 BitmapClearBit(valid_targets, instruction_begin),
692 *instruction_info_collected |= RESTRICTED_REGISTER_USED;
693 else
694 *instruction_info_collected |= UNRESTRICTED_INDEX_REGISTER;
695 } else {
696 *instruction_info_collected |= FORBIDDEN_BASE_REGISTER;
697 }
698 }
699
700
701 static INLINE void Process0Operands(enum OperandName *restricted_register,
702 uint32_t *instruction_info_collected) {
703 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special
704 * instruction, not with regular instruction. */
705 if (*restricted_register == REG_RSP) {
706 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
707 } else if (*restricted_register == REG_RBP) {
708 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
709 }
710 *restricted_register = NO_REG;
711 }
712
713 static INLINE void Process1Operand(enum OperandName *restricted_register,
714 uint32_t *instruction_info_collected,
715 uint8_t rex_prefix,
716 uint32_t operand_states) {
717 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special
718 * instruction, not with regular instruction. */
719 if (*restricted_register == REG_RSP) {
720 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
721 } else if (*restricted_register == REG_RBP) {
722 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
723 }
724 *restricted_register = NO_REG;
725 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) ||
726 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
727 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) {
728 *instruction_info_collected |= R15_MODIFIED;
729 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
730 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_RESTRICTED) ||
731 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) {
732 *instruction_info_collected |= BPL_MODIFIED;
733 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
734 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_RESTRICTED) ||
735 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) {
736 *instruction_info_collected |= SPL_MODIFIED;
737 }
738 }
739
740 static INLINE void Process1OperandZeroExtends(
741 enum OperandName *restricted_register,
742 uint32_t *instruction_info_collected,
743 uint8_t rex_prefix,
744 uint32_t operand_states) {
745 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special
746 * instruction, not with regular instruction. */
747 if (*restricted_register == REG_RSP) {
748 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
749 } else if (*restricted_register == REG_RBP) {
750 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
751 }
752 *restricted_register = NO_REG;
753 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) ||
754 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
755 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) {
756 *instruction_info_collected |= R15_MODIFIED;
757 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
758 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) {
759 *instruction_info_collected |= BPL_MODIFIED;
760 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
761 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) {
762 *instruction_info_collected |= SPL_MODIFIED;
763 /* Take 2 bits of operand type from operand_states as *restricted_register,
764 * make sure operand_states denotes a register (4th bit == 0). */
765 } else if ((operand_states & 0x70) == (OPERAND_SANDBOX_RESTRICTED << 5)) {
766 *restricted_register = operand_states & 0x0f;
767 }
768 }
769
770 static INLINE void Process2Operands(enum OperandName *restricted_register,
771 uint32_t *instruction_info_collected,
772 uint8_t rex_prefix,
773 uint32_t operand_states) {
774 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special
775 * instruction, not with regular instruction. */
776 if (*restricted_register == REG_RSP) {
777 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
778 } else if (*restricted_register == REG_RBP) {
779 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
780 }
781 *restricted_register = NO_REG;
782 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) ||
783 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
784 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED) ||
785 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_8BIT) ||
786 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
787 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) {
788 *instruction_info_collected |= R15_MODIFIED;
789 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
790 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_RESTRICTED) ||
791 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED) ||
792 (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
793 CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_RESTRICTED) ||
794 CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) {
795 *instruction_info_collected |= BPL_MODIFIED;
796 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
797 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_RESTRICTED) ||
798 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED) ||
799 (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
800 CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_RESTRICTED) ||
801 CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) {
802 *instruction_info_collected |= SPL_MODIFIED;
803 }
804 }
805
806 static INLINE void Process2OperandsZeroExtends(
807 enum OperandName *restricted_register,
808 uint32_t *instruction_info_collected,
809 uint8_t rex_prefix,
810 uint32_t operand_states) {
811 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special
812 * instruction, not with regular instruction. */
813 if (*restricted_register == REG_RSP) {
814 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
815 } else if (*restricted_register == REG_RBP) {
816 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
817 }
818 *restricted_register = NO_REG;
819 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) ||
820 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
821 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED) ||
822 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_8BIT) ||
823 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
824 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) {
825 *instruction_info_collected |= R15_MODIFIED;
826 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
827 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED) ||
828 (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
829 CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) {
830 *instruction_info_collected |= BPL_MODIFIED;
831 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
832 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED) ||
833 (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
834 CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) {
835 *instruction_info_collected |= SPL_MODIFIED;
836 /* Take 2 bits of operand type from operand_states as *restricted_register,
837 * make sure operand_states denotes a register (4th bit == 0). */
838 } else if ((operand_states & 0x70) == (OPERAND_SANDBOX_RESTRICTED << 5)) {
839 *restricted_register = operand_states & 0x0f;
840 if (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_RESTRICTED)) {
841 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
842 } else if (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_RESTRICTED)) {
843 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
844 }
845 /* Take 2 bits of operand type from operand_states as *restricted_register,
846 * make sure operand_states denotes a register (12th bit == 0). */
847 } else if ((operand_states & 0x7000) == (OPERAND_SANDBOX_RESTRICTED << 13)) {
848 *restricted_register = (operand_states & 0x0f00) >> 8;
849 }
850 }
851
852 /*
853 * This function merges "dangerous" instruction with sandboxing instructions to
854 * get a "superinstruction" and unmarks in-between jump targets.
855 */
856 static INLINE void ExpandSuperinstructionBySandboxingBytes(
857 size_t sandbox_instructions_size,
858 const uint8_t **instruction_begin,
859 const uint8_t *data,
860 bitmap_word *valid_targets) {
861 *instruction_begin -= sandbox_instructions_size;
862 /*
863 * We need to unmark start of the "dangerous" instruction itself, too, but we
864 * don't need to mark the beginning of the whole "superinstruction" - that's
865 * why we move start by one byte and don't change the length.
866 */
867 UnmarkValidJumpTargets((*instruction_begin + 1 - data),
868 sandbox_instructions_size,
869 valid_targets);
870 }
871
872 /*
873 * Return TRUE if naclcall or nacljmp uses the same register in all three
874 * instructions.
875 *
876 * This version is for the case where "add %src_register, %dst_register" with
877 * dst in RM field and src in REG field of ModR/M byte is used.
878 *
879 * There are five possible forms:
880 *
881 * 0: 83 eX e0 and $~0x1f,E86
882 * 3: 4? 01 fX add RBASE,R86
883 * 6: ff eX jmpq *R86
884 * ^ ^
885 * instruction_begin current_position
886 *
887 * 0: 4? 83 eX e0 and $~0x1f,E86
888 * 4: 4? 01 fX add RBASE,R86
889 * 7: ff eX jmpq *R86
890 * ^ ^
891 * instruction_begin current_position
892 *
893 * 0: 83 eX e0 and $~0x1f,E86
894 * 3: 4? 01 fX add RBASE,R86
895 * 6: 4? ff eX jmpq *R86
896 * ^ ^
897 * instruction_begin current_position
898 *
899 * 0: 4? 83 eX e0 and $~0x1f,E86
900 * 4: 4? 01 fX add RBASE,R86
901 * 7: 4? ff eX jmpq *R86
902 * ^ ^
903 * instruction_begin current_position
904 *
905 * 0: 4? 83 eX e0 and $~0x1f,E64
906 * 4: 4? 01 fX add RBASE,R64
907 * 7: 4? ff eX jmpq *R64
908 * ^ ^
909 * instruction_begin current_position
910 *
911 * We don't care about "?" (they are checked by DFA).
912 */
913 static INLINE Bool VerifyNaclCallOrJmpAddToRM(const uint8_t *instruction_begin,
914 const uint8_t *current_position) {
915 return
916 RMFromModRM(instruction_begin[-5]) == RMFromModRM(instruction_begin[-1]) &&
917 RMFromModRM(instruction_begin[-5]) == RMFromModRM(current_position[0]);
918 }
919
920 /*
921 * Return TRUE if naclcall or nacljmp uses the same register in all three
922 * instructions.
923 *
924 * This version is for the case where "add %src_register, %dst_register" with
925 * dst in REG field and src in RM field of ModR/M byte is used.
926 *
927 * There are five possible forms:
928 *
929 * 0: 83 eX e0 and $~0x1f,E86
930 * 3: 4? 03 Xf add RBASE,R86
931 * 6: ff eX jmpq *R86
932 * ^ ^
933 * instruction_begin current_position
934 *
935 * 0: 4? 83 eX e0 and $~0x1f,E86
936 * 4: 4? 03 Xf add RBASE,R86
937 * 7: ff eX jmpq *R86
938 * ^ ^
939 * instruction_begin current_position
940 *
941 * 0: 83 eX e0 and $~0x1f,E86
942 * 3: 4? 03 Xf add RBASE,R86
943 * 6: 4? ff eX jmpq *R86
944 * ^ ^
945 * instruction_begin current_position
946 *
947 * 0: 4? 83 eX e0 and $~0x1f,E86
948 * 4: 4? 03 Xf add RBASE,R86
949 * 7: 4? ff eX jmpq *R86
950 * ^ ^
951 * instruction_begin current_position
952 *
953 * 0: 4? 83 eX e0 and $~0x1f,E64
954 * 4: 4? 03 Xf add RBASE,R64
955 * 7: 4? ff eX jmpq *R64
956 * ^ ^
957 * instruction_begin current_position
958 *
959 * We don't care about "?" (they are checked by DFA).
960 */
961 static INLINE Bool VerifyNaclCallOrJmpAddToReg(
962 const uint8_t *instruction_begin,
963 const uint8_t *current_position) {
964 return
965 RMFromModRM(instruction_begin[-5]) == RegFromModRM(instruction_begin[-1]) &&
966 RMFromModRM(instruction_begin[-5]) == RMFromModRM(current_position[0]);
967 }
968
969 /*
970 * This function checks that naclcall or nacljmp are correct (that is: three
971 * component instructions match) and if that is true then it merges call or jmp
972 * with a sandboxing to get a "superinstruction" and removes in-between jump
973 * targets. If it's not true then it triggers "unrecognized instruction" error
974 * condition.
975 *
976 * This version is for the case where "add with dst register in RM field"
977 * (opcode 0x01) and "add without REX prefix" is used.
978 *
979 * There are two possibile forms:
980 *
981 * 0: 83 eX e0 and $~0x1f,E86
982 * 3: 4? 01 fX add RBASE,R86
983 * 6: ff eX jmpq *R86
984 * ^ ^
985 * instruction_begin current_position
986 *
987 * 0: 83 eX e0 and $~0x1f,E86
988 * 3: 4? 01 fX add RBASE,R86
989 * 6: 4? ff eX jmpq *R86
990 * ^ ^
991 * instruction_begin current_position
992 */
993 static INLINE void ProcessNaclCallOrJmpAddToRMNoRex(
994 uint32_t *instruction_info_collected,
995 const uint8_t **instruction_begin,
996 const uint8_t *current_position,
997 const uint8_t *data,
998 bitmap_word *valid_targets) {
999 if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position))
1000 ExpandSuperinstructionBySandboxingBytes(
1001 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets);
1002 else
1003 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
1004 }
1005
1006 /*
1007 * This function checks that naclcall or nacljmp are correct (that is: three
1008 * component instructions match) and if that is true then it merges call or jmp
1009 * with a sandboxing to get a "superinstruction" and removes in-between jump
1010 * targets. If it's not true then it triggers "unrecognized instruction" error
1011 * condition.
1012 *
1013 * This version is for the case where "add with dst register in REG field"
1014 * (opcode 0x03) and "add without REX prefix" is used.
1015 *
1016 * There are two possibile forms:
1017 *
1018 * 0: 83 eX e0 and $~0x1f,E86
1019 * 3: 4? 03 Xf add RBASE,R86
1020 * 6: ff eX jmpq *R86
1021 * ^ ^
1022 * instruction_begin current_position
1023 *
1024 * 0: 83 eX e0 and $~0x1f,E86
1025 * 3: 4? 03 Xf add RBASE,R86
1026 * 6: 4? ff eX jmpq *R86
1027 * ^ ^
1028 * instruction_begin current_position
1029 */
1030 static INLINE void ProcessNaclCallOrJmpAddToRegNoRex(
1031 uint32_t *instruction_info_collected,
1032 const uint8_t **instruction_begin,
1033 const uint8_t *current_position,
1034 const uint8_t *data,
1035 bitmap_word *valid_targets) {
1036 if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position))
1037 ExpandSuperinstructionBySandboxingBytes(
1038 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets);
1039 else
1040 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
1041 }
1042
1043 /*
1044 * This function checks that naclcall or nacljmp are correct (that is: three
1045 * component instructions match) and if that is true then it merges call or jmp
1046 * with a sandboxing to get a "superinstruction" and removes in-between jump
1047 * targets. If it's not true then it triggers "unrecognized instruction" error
1048 * condition.
1049 *
1050 * This version is for the case where "add with dst register in RM field"
1051 * (opcode 0x01) and "add without REX prefix" is used.
1052 *
1053 * There are three possibile forms:
1054 *
1055 * 0: 4? 83 eX e0 and $~0x1f,E86
1056 * 4: 4? 01 fX add RBASE,R86
1057 * 7: ff eX jmpq *R86
1058 * ^ ^
1059 * instruction_begin current_position
1060 *
1061 * 0: 4? 83 eX e0 and $~0x1f,E86
1062 * 4: 4? 01 fX add RBASE,R86
1063 * 7: 4? ff eX jmpq *R86
1064 * ^ ^
1065 * instruction_begin current_position
1066 *
1067 * 0: 4? 83 eX e0 and $~0x1f,E64
1068 * 4: 4? 01 fX add RBASE,R64
1069 * 7: 4? ff eX jmpq *R64
1070 * ^ ^
1071 * instruction_begin current_position
1072 */
1073 static INLINE void ProcessNaclCallOrJmpAddToRMWithRex(
1074 uint32_t *instruction_info_collected,
1075 const uint8_t **instruction_begin,
1076 const uint8_t *current_position,
1077 const uint8_t *data,
1078 bitmap_word *valid_targets) {
1079 if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position))
1080 ExpandSuperinstructionBySandboxingBytes(
1081 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets);
1082 else
1083 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
1084 }
1085
1086 /*
1087 * This function checks that naclcall or nacljmp are correct (that is: three
1088 * component instructions match) and if that is true then it merges call or jmp
1089 * with a sandboxing to get a "superinstruction" and removes in-between jump
1090 * targets. If it's not true then it triggers "unrecognized instruction" error
1091 * condition.
1092 *
1093 * This version is for the case where "add with dst register in REG field"
1094 * (opcode 0x03) and "add without REX prefix" is used.
1095 *
1096 * There are three possibile forms:
1097 *
1098 * 0: 4? 83 eX e0 and $~0x1f,E86
1099 * 4: 4? 03 Xf add RBASE,R86
1100 * 7: ff eX jmpq *R86
1101 * ^ ^
1102 * instruction_begin current_position
1103 *
1104 * 0: 4? 83 eX e0 and $~0x1f,E86
1105 * 4: 4? 03 Xf add RBASE,R86
1106 * 7: 4? ff eX jmpq *R86
1107 * ^ ^
1108 * instruction_begin current_position
1109 *
1110 * 0: 4? 83 eX e0 and $~0x1f,E64
1111 * 4: 4? 03 Xf add RBASE,R64
1112 * 7: 4? ff eX jmpq *R64
1113 * ^ ^
1114 * instruction_begin current_position
1115 */
1116 static INLINE void ProcessNaclCallOrJmpAddToRegWithRex(
1117 uint32_t *instruction_info_collected,
1118 const uint8_t **instruction_begin,
1119 const uint8_t *current_position,
1120 const uint8_t *data,
1121 bitmap_word *valid_targets) {
1122 if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position))
1123 ExpandSuperinstructionBySandboxingBytes(
1124 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets);
1125 else
1126 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
1127 }
1128
1129
1130 Bool ValidateChunkAMD64(const uint8_t *data, size_t size,
1131 uint32_t options,
1132 const NaClCPUFeaturesX86 *cpu_features,
1133 ValidationCallbackFunc user_callback,
1134 void *callback_data) {
1135 bitmap_word valid_targets_small;
1136 bitmap_word jump_dests_small;
1137 bitmap_word *valid_targets;
1138 bitmap_word *jump_dests;
1139 const uint8_t *current_position;
1140 const uint8_t *end_of_bundle;
1141 int result = TRUE;
1142
1143 CHECK(sizeof valid_targets_small == sizeof jump_dests_small);
1144 CHECK(size % kBundleSize == 0);
1145
1146 /*
1147 * For a very small sequences (one bundle) malloc is too expensive.
1148 *
1149 * Note1: we allocate one extra bit, because we set valid jump target bits
1150 * _after_ instructions, so there will be one at the end of the chunk.
1151 *
1152 * Note2: we don't ever mark first bit as a valid jump target but this is
1153 * not a problem because any aligned address is valid jump target.
1154 */
1155 if ((size + 1) <= (sizeof valid_targets_small * 8)) {
1156 valid_targets_small = 0;
1157 valid_targets = &valid_targets_small;
1158 jump_dests_small = 0;
1159 jump_dests = &jump_dests_small;
1160 } else {
1161 valid_targets = BitmapAllocate(size + 1);
1162 jump_dests = BitmapAllocate(size + 1);
1163 if (!valid_targets || !jump_dests) {
1164 free(jump_dests);
1165 free(valid_targets);
1166 errno = ENOMEM;
1167 return FALSE;
1168 }
1169 }
1170
1171 /*
1172 * This option is usually used in tests: we will process the whole chunk
1173 * in one pass. Usually each bundle is processed separately which means
1174 * instructions (and super-instructions) can not cross borders of the bundle.
1175 */
1176 if (options & PROCESS_CHUNK_AS_A_CONTIGUOUS_STREAM)
1177 end_of_bundle = data + size;
1178 else
1179 end_of_bundle = data + kBundleSize;
1180
1181 /*
1182 * Main loop. Here we process the data array bundle-after-bundle.
1183 * Ragel-produced DFA does all the checks with one exception: direct jumps.
1184 * It collects the two arrays: valid_targets and jump_dests which are used
1185 * to test direct jumps later.
1186 */
1187 for (current_position = data;
1188 current_position < data + size;
1189 current_position = end_of_bundle,
1190 end_of_bundle = current_position + kBundleSize) {
1191 /* Start of the instruction being processed. */
1192 const uint8_t *instruction_begin = current_position;
1193 /* Only used locally in the end_of_instruction_cleanup action. */
1194 const uint8_t *instruction_end;
1195 int current_state;
1196 uint32_t instruction_info_collected = 0;
1197 /* Keeps one byte of information per operand in the current instruction:
1198 * 2 bits for register kinds,
1199 * 5 bits for register numbers (16 regs plus RIZ). */
1200 uint32_t operand_states = 0;
1201 enum OperandName base = NO_REG;
1202 enum OperandName index = NO_REG;
1203 enum OperandName restricted_register =
1204 EXTRACT_RESTRICTED_REGISTER_INITIAL_VALUE(options);
1205 uint8_t rex_prefix = FALSE;
1206 /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */
1207 uint8_t vex_prefix2 = VEX_R | VEX_X | VEX_B;
1208 uint8_t vex_prefix3 = 0x00;
1209
1210 %% write init;
1211 %% write exec;
1212
1213 /*
1214 * Ragel DFA accepted the bundle, but we still need to make sure the last
1215 * instruction haven't left %rbp or %rsp in restricted state.
1216 */
1217 if (restricted_register == REG_RBP)
1218 result &= user_callback(end_of_bundle, end_of_bundle,
1219 RESTRICTED_RBP_UNPROCESSED |
1220 ((REG_RBP << RESTRICTED_REGISTER_SHIFT) &
1221 RESTRICTED_REGISTER_MASK), callback_data);
1222 else if (restricted_register == REG_RSP)
1223 result &= user_callback(end_of_bundle, end_of_bundle,
1224 RESTRICTED_RSP_UNPROCESSED |
1225 ((REG_RSP << RESTRICTED_REGISTER_SHIFT) &
1226 RESTRICTED_REGISTER_MASK), callback_data);
1227 }
1228
1229 /*
1230 * Check the direct jumps. All the targets from jump_dests must be in
1231 * valid_targets.
1232 */
1233 result &= ProcessInvalidJumpTargets(data, size, valid_targets, jump_dests,
1234 user_callback, callback_data);
1235
1236 /* We only use malloc for a large code sequences */
1237 if (jump_dests != &jump_dests_small) free(jump_dests);
1238 if (valid_targets != &valid_targets_small) free(valid_targets);
1239 if (!result) errno = EINVAL;
1240 return result;
1241 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698