| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright (c) 2012 The Native Client Authors. All rights reserved. | |
| 3 * Use of this source code is governed by a BSD-style license that can be | |
| 4 * found in the LICENSE file. | |
| 5 */ | |
| 6 | |
| 7 /* | |
| 8 * This is the core of amd64-mode validator. Please note that this file | |
| 9 * combines ragel machine description and C language actions. Please read | |
| 10 * validator_internals.html first to understand how the whole thing is built: | |
| 11 * it explains how the byte sequences are constructed, what constructs like | |
| 12 * "@{}" or "REX_WRX?" mean, etc. | |
| 13 */ | |
| 14 | |
| 15 #include <assert.h> | |
| 16 #include <errno.h> | |
| 17 #include <stddef.h> | |
| 18 #include <stdio.h> | |
| 19 #include <stdlib.h> | |
| 20 #include <string.h> | |
| 21 | |
| 22 #include "native_client/src/trusted/validator_ragel/bitmap.h" | |
| 23 #include "native_client/src/trusted/validator_ragel/unreviewed/validator_interna
l.h" | |
| 24 | |
| 25 %%{ | |
| 26 machine x86_64_validator; | |
| 27 alphtype unsigned char; | |
| 28 variable p current_position; | |
| 29 variable pe end_of_bundle; | |
| 30 variable eof end_of_bundle; | |
| 31 variable cs current_state; | |
| 32 | |
| 33 include byte_machine "byte_machines.rl"; | |
| 34 | |
| 35 include prefixes_parsing_validator | |
| 36 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; | |
| 37 include rex_actions | |
| 38 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; | |
| 39 include rex_parsing | |
| 40 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; | |
| 41 include vex_actions_amd64 | |
| 42 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; | |
| 43 include vex_parsing_amd64 | |
| 44 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; | |
| 45 include displacement_fields_parsing | |
| 46 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; | |
| 47 include modrm_actions_amd64 | |
| 48 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; | |
| 49 include modrm_parsing | |
| 50 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; | |
| 51 include operand_format_actions | |
| 52 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; | |
| 53 include operand_source_actions_amd64 | |
| 54 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; | |
| 55 include immediate_fields_parsing | |
| 56 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; | |
| 57 include relative_fields_validator_actions | |
| 58 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; | |
| 59 include relative_fields_parsing | |
| 60 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; | |
| 61 include cpuid_actions | |
| 62 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; | |
| 63 | |
| 64 action check_access { | |
| 65 CheckAccess(instruction_begin - data, base, index, restricted_register, | |
| 66 valid_targets, &instruction_info_collected); | |
| 67 } | |
| 68 | |
| 69 # Action which marks last byte as not immediate. Most 3DNow! instructions, | |
| 70 # some AVX and XOP instructions have this proerty. It's referenced by | |
| 71 # decode_x86_32 machine in [autogenerated] "validator_x86_32_instruction.rl" | |
| 72 # file. | |
| 73 action last_byte_is_not_immediate { | |
| 74 instruction_info_collected |= LAST_BYTE_IS_NOT_IMMEDIATE; | |
| 75 } | |
| 76 | |
| 77 action modifiable_instruction { | |
| 78 instruction_info_collected |= MODIFIABLE_INSTRUCTION; | |
| 79 } | |
| 80 | |
| 81 action process_0_operands { | |
| 82 Process0Operands(&restricted_register, &instruction_info_collected); | |
| 83 } | |
| 84 action process_1_operand { | |
| 85 Process1Operand(&restricted_register, &instruction_info_collected, | |
| 86 rex_prefix, operand_states); | |
| 87 } | |
| 88 action process_1_operand_zero_extends { | |
| 89 Process1OperandZeroExtends(&restricted_register, | |
| 90 &instruction_info_collected, rex_prefix, | |
| 91 operand_states); | |
| 92 } | |
| 93 action process_2_operands { | |
| 94 Process2Operands(&restricted_register, &instruction_info_collected, | |
| 95 rex_prefix, operand_states); | |
| 96 } | |
| 97 action process_2_operands_zero_extends { | |
| 98 Process2OperandsZeroExtends(&restricted_register, | |
| 99 &instruction_info_collected, rex_prefix, | |
| 100 operand_states); | |
| 101 } | |
| 102 | |
| 103 include decode_x86_64 "validator_x86_64_instruction.rl"; | |
| 104 | |
| 105 # Special %rbp modifications - the ones which don't need a sandboxing. | |
| 106 # | |
| 107 # Note that there are two different opcodes for "mov": in x86-64 there are two | |
| 108 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move | |
| 109 # from REG field to RM or in the other direction thus there are two encodings | |
| 110 # for the register-to-register move. | |
| 111 rbp_modifications = | |
| 112 (b_0100_10x0 0x89 0xe5 | # mov %rsp,%rbp | |
| 113 b_0100_10x0 0x8b 0xec) # mov %rsp,%rbp | |
| 114 @process_0_operands; | |
| 115 | |
| 116 # Special instructions used for %rbp sandboxing. | |
| 117 # | |
| 118 # This is the "second half" of the %rbp sandboxing. Any zero-extending | |
| 119 # instruction which stores the data in %ebp can be first half, but unlike | |
| 120 # the situation with other "normal" registers you can not just write to | |
| 121 # %ebp and continue: such activity MUST restore the status quo immediately | |
| 122 # via one of these instructions. | |
| 123 rbp_sandboxing = | |
| 124 (b_0100_11x0 0x01 0xfd | # add %r15,%rbp | |
| 125 b_0100_10x1 0x03 0xef | # add %r15,%rbp | |
| 126 # Note that unlike %rsp case, there is no 'lea (%rbp,%r15,1),%rbp' | |
| 127 # instruction (it gets assembled as 'lea 0x00(%rbp,%r15,1),%rbp'). | |
| 128 0x4a 0x8d 0x6c 0x3d 0x00 | # lea 0x00(%rbp,%r15,1),%rbp | |
| 129 0x4a 0x8d 0xac 0x3d 0x00 0x00 0x00 0x00) # lea 0x00000000(%rbp,%r15,1),%rbp | |
| 130 # Note: restricted_register keeps the restricted register as explained in | |
| 131 # http://www.chromium.org/nativeclient/design-documents/nacl-sfi-model-on-x8
6-64-systems | |
| 132 # | |
| 133 # "Normal" instructions can not be used in a place where %rbp is restricted. | |
| 134 # But since these instructions are "second half" of the %rbp sandboxing they | |
| 135 # can be used *only* when %rbp is restricted. | |
| 136 # | |
| 137 # That is (normal instruction): | |
| 138 # mov %eax,%ebp | |
| 139 # mov %esi,%edi <- Error: %ebp is restricted | |
| 140 # vs | |
| 141 # mov %esi,%edi | |
| 142 # add %r15,%rbp <- Error: %ebp is *not* restricted | |
| 143 # vs | |
| 144 # mov %eax,%ebp | |
| 145 # add %r15,%rbp <- Ok: %rbp is restricted as it should be | |
| 146 # | |
| 147 # Check this precondition and mark the beginning of the instruction as | |
| 148 # invalid jump for target. | |
| 149 @{ if (restricted_register == REG_RBP) | |
| 150 instruction_info_collected |= RESTRICTED_REGISTER_USED; | |
| 151 else | |
| 152 instruction_info_collected |= UNRESTRICTED_RBP_PROCESSED; | |
| 153 restricted_register = NO_REG; | |
| 154 UnmarkValidJumpTarget((instruction_begin - data), valid_targets); | |
| 155 }; | |
| 156 | |
| 157 # Special %rsp modifications - the ones which don't need a sandboxing. | |
| 158 # | |
| 159 # Note that there are two different opcodes for "mov": in x86-64 there are two | |
| 160 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move | |
| 161 # from REG field to RM or in the other direction thus there are two encodings | |
| 162 # for the register-to-register move. | |
| 163 rsp_modifications = | |
| 164 (b_0100_10x0 0x89 0xec | # mov %rbp,%rsp | |
| 165 b_0100_10x0 0x8b 0xe5 | # mov %rbp,%rsp | |
| 166 # Superfluous bits are not supported: | |
| 167 # http://code.google.com/p/nativeclient/issues/detail?id=3012 | |
| 168 b_0100_1000 0x83 0xe4 (0x80 .. 0xff)) # and $XXX,%rsp | |
| 169 @process_0_operands; | |
| 170 | |
| 171 # Special instructions used for %rsp sandboxing. | |
| 172 # | |
| 173 # This is the "second half" of the %rsp sandboxing. Any zero-extending | |
| 174 # instruction which stores the data in %esp can be first half, but unlike | |
| 175 # the situation with other "normal" registers you can not just write to | |
| 176 # %esp and continue: such activity MUST restore the status quo immediately | |
| 177 # via one of these instructions. | |
| 178 rsp_sandboxing = | |
| 179 (b_0100_11x0 0x01 0xfc | # add %r15,%rsp | |
| 180 b_0100_10x1 0x03 0xe7 | # add %r15,%rsp | |
| 181 # OR can be used as well, see | |
| 182 # http://code.google.com/p/nativeclient/issues/detail?id=3070 | |
| 183 b_0100_11x0 0x09 0xfc | # or %r15,%rsp | |
| 184 b_0100_10x1 0x0b 0xe7 | # or %r15,%rsp | |
| 185 0x4a 0x8d 0x24 0x3c | # lea (%rsp,%r15,1),%rsp | |
| 186 0x4a 0x8d 0x64 0x3c 0x00 | # lea 0x00(%rsp,%r15,1),%rsp | |
| 187 0x4a 0x8d 0xa4 0x3c 0x00 0x00 0x00 0x00) # lea 0x00000000(%rsp,%r15,1),%rsp | |
| 188 # Note: restricted_register keeps the restricted register as explained in | |
| 189 # http://www.chromium.org/nativeclient/design-documents/nacl-sfi-model-on-x8
6-64-systems | |
| 190 # | |
| 191 # "Normal" instructions can not be used in a place where %rsp is restricted. | |
| 192 # But since these instructions are "second half" of the %rsp sandboxing they | |
| 193 # can be used *only* when %rsp is restricted. | |
| 194 # | |
| 195 # That is (normal instruction): | |
| 196 # mov %eax,%esp | |
| 197 # mov %esi,%edi <- Error: %esp is restricted | |
| 198 # vs | |
| 199 # mov %esi,%edi | |
| 200 # add %r15,%rsp <- Error: %esp is *not* restricted | |
| 201 # vs | |
| 202 # mov %eax,%esp | |
| 203 # add %r15,%rsp <- Ok: %rsp is restricted as it should be | |
| 204 # | |
| 205 # Check this precondition and mark the beginning of the instruction as | |
| 206 # invalid jump for target. | |
| 207 @{ if (restricted_register == REG_RSP) | |
| 208 instruction_info_collected |= RESTRICTED_REGISTER_USED; | |
| 209 else | |
| 210 instruction_info_collected |= UNRESTRICTED_RSP_PROCESSED; | |
| 211 restricted_register = NO_REG; | |
| 212 UnmarkValidJumpTarget((instruction_begin - data), valid_targets); | |
| 213 }; | |
| 214 | |
| 215 # naclcall or nacljmp. These are three-instruction indirection-jump sequences. | |
| 216 # and $~0x1f, %eXX | |
| 217 # and RBASE, %rXX | |
| 218 # jmpq *%rXX (or: callq *%rXX) | |
| 219 # Note: first "and $~0x1f, %eXX" is a normal instruction (it can occur not | |
| 220 # just as part of the naclcall/nacljmp, but also as a standolene instruction). | |
| 221 # | |
| 222 # This means that when naclcall_or_nacljmp ragel machine will be combined with | |
| 223 # "normal_instruction*" regular action process_1_operand_zero_extends will be | |
| 224 # triggered when main ragel machine will accept "and $~0x1f, %eXX" x86-64 | |
| 225 # instruction. This action will check if %rbp/%rsp is legally modified thus | |
| 226 # we don't need to duplicate this logic in naclcall_or_nacljmp ragel machine. | |
| 227 # | |
| 228 # There are number of variants present which differ by the REX prefix usage: | |
| 229 # we need to make sure "%eXX" in "and", "%rXX" in "add", and "%eXX" in "jmpq" | |
| 230 # or "callq" is the same register and it's much simpler to do if one single | |
| 231 # action handles only fixed number of bytes. | |
| 232 # | |
| 233 # Additional complication arises because x86-64 contains two different "add" | |
| 234 # instruction: with "0x01" and "0x03" opcode. They differ in the direction | |
| 235 # used: both can encode "add %src_register, %dst_register", but the first one | |
| 236 # uses field REG of the ModR/M byte for the src and field RM of the ModR/M | |
| 237 # byte for the dst while last one uses field RM of the ModR/M byte for the src | |
| 238 # and field REG of the ModR/M byte for dst. Both should be allowed. | |
| 239 # | |
| 240 # See AMD/Intel manual for clarification "add" instruction encoding. | |
| 241 # | |
| 242 # REGISTER USAGE ABBREVIATIONS: | |
| 243 # E86: legacy ia32 registers (all eight: %eax to %edi) | |
| 244 # R86: 64-bit counterparts for legacy 386 registers (%rax to %rdi) | |
| 245 # E64: 32-bit counterparts for new amd64 registers (%r8d to %r14d) | |
| 246 # R64: new amd64 registers (only seven: %r8 to %r14) | |
| 247 # RBASE: %r15 (used as "base of untrusted world" in NaCl for amd64) | |
| 248 naclcall_or_nacljmp = | |
| 249 # This block encodes call and jump "superinstruction" of the following form: | |
| 250 # 0: 83 e_ e0 and $~0x1f,E86 | |
| 251 # 3: 4_ 01 f_ add RBASE,R86 | |
| 252 # 6: ff e_ jmpq *R86 | |
| 253 #### INSTRUCTION ONE (three bytes) | |
| 254 # and $~0x1f, E86 | |
| 255 (0x83 b_11_100_xxx 0xe0 | |
| 256 #### INSTRUCTION TWO (three bytes) | |
| 257 # add RBASE, R86 (0x01 opcode) | |
| 258 b_0100_11x0 0x01 b_11_111_xxx | |
| 259 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix) | |
| 260 # callq R86 | |
| 261 ((REX_WRX? 0xff b_11_010_xxx) | | |
| 262 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix) | |
| 263 # jmpq R86 | |
| 264 (REX_WRX? 0xff b_11_100_xxx))) | |
| 265 @{ | |
| 266 ProcessNaclCallOrJmpAddToRMNoRex(&instruction_info_collected, | |
| 267 &instruction_begin, current_position, | |
| 268 data, valid_targets); | |
| 269 } | | |
| 270 | |
| 271 # This block encodes call and jump "superinstruction" of the following form: | |
| 272 # 0: 83 e_ e0 and $~0x1f,E86 | |
| 273 # 3: 4_ 03 _f add RBASE,R86 | |
| 274 # 6: ff e_ jmpq *R86 | |
| 275 #### INSTRUCTION ONE (three bytes) | |
| 276 # and $~0x1f, E86 | |
| 277 (0x83 b_11_100_xxx 0xe0 | |
| 278 #### INSTRUCTION TWO (three bytes) | |
| 279 # add RBASE, R86 (0x03 opcode) | |
| 280 b_0100_10x1 0x03 b_11_xxx_111 | |
| 281 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix) | |
| 282 # callq R86 | |
| 283 ((REX_WRX? 0xff b_11_010_xxx) | | |
| 284 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix) | |
| 285 # jmpq R86 | |
| 286 (REX_WRX? 0xff b_11_100_xxx))) | |
| 287 @{ | |
| 288 ProcessNaclCallOrJmpAddToRegNoRex(&instruction_info_collected, | |
| 289 &instruction_begin, current_position, | |
| 290 data, valid_targets); | |
| 291 } | | |
| 292 | |
| 293 # This block encodes call and jump "superinstruction" of the following form: | |
| 294 # 0: 4_ 83 e_ e0 and $~0x1f,E86 | |
| 295 # 4: 4_ 01 f_ add RBASE,R86 | |
| 296 # 7: ff e_ jmpq *R86 | |
| 297 #### INSTRUCTION ONE (four bytes) | |
| 298 # and $~0x1f, E86 | |
| 299 ((REX_RX 0x83 b_11_100_xxx 0xe0 | |
| 300 #### INSTRUCTION TWO (three bytes) | |
| 301 # add RBASE, R86 (0x01 opcode) | |
| 302 b_0100_11x0 0x01 b_11_111_xxx | |
| 303 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix) | |
| 304 # callq R86 | |
| 305 ((REX_WRX? 0xff b_11_010_xxx) | | |
| 306 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix) | |
| 307 # jmpq R86 | |
| 308 (REX_WRX? 0xff b_11_100_xxx))) | | |
| 309 | |
| 310 # This block encodes call and jump "superinstruction" of the following form: | |
| 311 # 0: 4_ 83 e_ e0 and $~0x1f,E64 | |
| 312 # 4: 4_ 01 f_ add RBASE,R64 | |
| 313 # 7: 4_ ff e_ jmpq *R64 | |
| 314 #### INSTRUCTION ONE (four bytes) | |
| 315 # and $~0x1f, E64 | |
| 316 (b_0100_0xx1 0x83 (b_11_100_xxx - b_11_100_111) 0xe0 | |
| 317 #### INSTRUCTION TWO (three bytes) | |
| 318 # add RBASE, R64 (0x01 opcode) | |
| 319 b_0100_11x1 0x01 (b_11_111_xxx - b_11_111_111) | |
| 320 #### INSTRUCTION THREE: call (three bytes) | |
| 321 # callq R64 | |
| 322 ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) | | |
| 323 #### INSTRUCTION THREE: jmp (three bytes) | |
| 324 # jmpq R64 | |
| 325 (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111))))) | |
| 326 @{ | |
| 327 ProcessNaclCallOrJmpAddToRMWithRex(&instruction_info_collected, | |
| 328 &instruction_begin, current_position, | |
| 329 data, valid_targets); | |
| 330 } | | |
| 331 | |
| 332 # This block encodes call and jump "superinstruction" of the following form: | |
| 333 # 0: 4_ 83 e_ e0 and $~0x1f,E86 | |
| 334 # 4: 4_ 03 _f add RBASE,R86 | |
| 335 # 7: ff e_ jmpq *R86 | |
| 336 #### INSTRUCTION ONE (four bytes) | |
| 337 # and $~0x1f, E86 | |
| 338 ((REX_RX 0x83 b_11_100_xxx 0xe0 | |
| 339 #### INSTRUCTION TWO (three bytes) | |
| 340 # add RBASE, R86 (0x03 opcode) | |
| 341 b_0100_10x1 0x03 b_11_xxx_111 | |
| 342 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix) | |
| 343 # callq R86 | |
| 344 ((REX_WRX? 0xff b_11_010_xxx) | | |
| 345 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix) | |
| 346 # jmpq R86 | |
| 347 (REX_WRX? 0xff b_11_100_xxx))) | | |
| 348 | |
| 349 # This block encodes call and jump "superinstruction" of the following form: | |
| 350 # 0: 4_ 83 e_ e0 and $~0x1f,E64 | |
| 351 # 4: 4_ 03 _f add RBASE,R64 | |
| 352 # 7: 4_ ff e_ jmpq *R64 | |
| 353 #### INSTRUCTION ONE (four bytes) | |
| 354 # and $~0x1f, E64 | |
| 355 (b_0100_0xx1 0x83 (b_11_100_xxx - b_11_100_111) 0xe0 | |
| 356 #### INSTRUCTION TWO (three bytes) | |
| 357 # add RBASE, R64 (0x03 opcode) | |
| 358 b_0100_11x1 0x03 (b_11_xxx_111 - b_11_111_111) | |
| 359 #### INSTRUCTION THREE: call (three bytes) | |
| 360 # callq R64 | |
| 361 ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) | | |
| 362 #### INSTRUCTION THREE: jmp (three bytes) | |
| 363 # jmpq R64 | |
| 364 (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111))))) | |
| 365 @{ | |
| 366 ProcessNaclCallOrJmpAddToRegWithRex(&instruction_info_collected, | |
| 367 &instruction_begin, current_position, | |
| 368 data, valid_targets); | |
| 369 }; | |
| 370 | |
| 371 # EMMX/SSE/SSE2/AVX instructions which have implicit %ds:(%rsi) operand | |
| 372 | |
| 373 # maskmovq %mmX,%mmY (EMMX or SSE) | |
| 374 maskmovq = REX_WRXB? 0x0f 0xf7 @CPUFeature_EMMXSSE modrm_registers; | |
| 375 | |
| 376 # maskmovdqu %xmmX, %xmmY (SSE2) | |
| 377 maskmovdqu = 0x66 REX_WRXB? 0x0f 0xf7 @CPUFeature_SSE2 modrm_registers; | |
| 378 | |
| 379 # vmaskmovdqu %xmmX, %xmmY (AVX) | |
| 380 vmaskmovdqu = ((0xc4 (VEX_RB & VEX_map00001) b_0_1111_0_01) | | |
| 381 (0xc5 b_X_1111_0_01)) 0xf7 @CPUFeature_AVX modrm_registers; | |
| 382 | |
| 383 mmx_sse_rdi_instruction = maskmovq | maskmovdqu | vmaskmovdqu; | |
| 384 | |
| 385 # Temporary fix: for string instructions combination of data16 and rep(ne) | |
| 386 # prefixes is disallowed to mimic old validator behavior. | |
| 387 # See http://code.google.com/p/nativeclient/issues/detail?id=1950 | |
| 388 | |
| 389 # data16rep = (data16 | rep data16 | data16 rep); | |
| 390 # data16condrep = (data16 | condrep data16 | data16 condrep); | |
| 391 data16rep = data16; | |
| 392 data16condrep = data16; | |
| 393 | |
| 394 # String instructions which use only %ds:(%rsi) | |
| 395 string_instruction_rsi_no_rdi = | |
| 396 (rep? 0xac | # lods %ds:(%rsi),%al | |
| 397 data16rep 0xad | # lods %ds:(%rsi),%ax | |
| 398 rep? REXW_NONE? 0xad); # lods %ds:(%rsi),%eax/%rax | |
| 399 | |
| 400 # String instructions which use only %ds:(%rdi) | |
| 401 string_instruction_rdi_no_rsi = | |
| 402 condrep? 0xae | # scas %es:(%rdi),%al | |
| 403 data16condrep 0xaf | # scas %es:(%rdi),%ax | |
| 404 condrep? REXW_NONE? 0xaf | # scas %es:(%rdi),%eax/%rax | |
| 405 | |
| 406 rep? 0xaa | # stos %al,%es:(%rdi) | |
| 407 data16rep 0xab | # stos %ax,%es:(%rdi) | |
| 408 rep? REXW_NONE? 0xab; # stos %eax/%rax,%es:(%rdi) | |
| 409 | |
| 410 # String instructions which use both %ds:(%rsi) and %es:(%rdi) | |
| 411 string_instruction_rsi_rdi = | |
| 412 condrep? 0xa6 | # cmpsb %es:(%rdi),%ds:(%rsi) | |
| 413 data16condrep 0xa7 | # cmpsw %es:(%rdi),%ds:(%rsi) | |
| 414 condrep? REXW_NONE? 0xa7 | # cmps[lq] %es:(%rdi),%ds:(%rsi) | |
| 415 | |
| 416 rep? 0xa4 | # movsb %ds:(%rsi),%es:(%rdi) | |
| 417 data16rep 0xa5 | # movsw %ds:(%rsi),%es:(%rdi) | |
| 418 rep? REXW_NONE? 0xa5; # movs[lq] %ds:(%rsi),%es:(%rdi) | |
| 419 | |
| 420 # "Superinstruction" which includes %rsi sandboxing. | |
| 421 # | |
| 422 # There are two variants which handle spurious REX prefixes. | |
| 423 # | |
| 424 # Note that both "0x89 0xf6" and "0x8b 0xf6" encode "mov %edi,%edi": in x86-64 | |
| 425 # there are two fields in ModR/M byte (REG field and RM field) and "mov" may | |
| 426 # be used to move from REG field to RM or in the other direction thus there | |
| 427 # are two encodings for the register-to-register move (and since REG and RM | |
| 428 # are identical here only opcode differs). | |
| 429 sandbox_instruction_rsi_no_rdi = | |
| 430 (0x89 | 0x8b) 0xf6 # mov %esi,%esi | |
| 431 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi | |
| 432 string_instruction_rsi_no_rdi | |
| 433 @{ | |
| 434 ExpandSuperinstructionBySandboxingBytes( | |
| 435 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); | |
| 436 } | | |
| 437 | |
| 438 REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi | |
| 439 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi | |
| 440 string_instruction_rsi_no_rdi | |
| 441 @{ | |
| 442 ExpandSuperinstructionBySandboxingBytes( | |
| 443 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); | |
| 444 }; | |
| 445 | |
| 446 # "Superinstruction" which includes %rdi sandboxing. | |
| 447 # | |
| 448 # There are two variants which handle spurious REX prefixes. | |
| 449 # | |
| 450 # Note that both "0x89 0xff" and "0x8b 0xff" encode "mov %edi,%edi": in x86-64 | |
| 451 # there are two fields in ModR/M byte (REG field and RM field) and "mov" may | |
| 452 # be used to move from REG field to RM or in the other direction thus there | |
| 453 # are two encodings for the register-to-register move (and since REG and RM | |
| 454 # are identical here only opcode differs). | |
| 455 sandbox_instruction_rdi_no_rsi = | |
| 456 (0x89 | 0x8b) 0xff # mov %edi,%edi | |
| 457 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi | |
| 458 (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction) | |
| 459 @{ | |
| 460 ExpandSuperinstructionBySandboxingBytes( | |
| 461 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); | |
| 462 } | | |
| 463 | |
| 464 REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi | |
| 465 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi | |
| 466 (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction) | |
| 467 @{ | |
| 468 ExpandSuperinstructionBySandboxingBytes( | |
| 469 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); | |
| 470 }; | |
| 471 | |
| 472 | |
| 473 # "Superinstruction" which includes both %rsi and %rdi sandboxing. | |
| 474 # | |
| 475 # There are four variants which handle spurious REX prefixes. | |
| 476 # | |
| 477 # Note that both "0x89 0xf6" and "0x8b 0xf6" encode "mov %esi,%esi" while both | |
| 478 # "0x89 0xff" and "0x8b 0xff" encode "mov %edi,%edi": in x86-64 there are two | |
| 479 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move | |
| 480 # from REG field to RM or in the other direction thus there are two encodings | |
| 481 # for the register-to-register move (and since REG and RM are identical here | |
| 482 # only opcode differs). | |
| 483 sandbox_instruction_rsi_rdi = | |
| 484 (0x89 | 0x8b) 0xf6 # mov %esi,%esi | |
| 485 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi | |
| 486 (0x89 | 0x8b) 0xff # mov %edi,%edi | |
| 487 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi | |
| 488 string_instruction_rsi_rdi | |
| 489 @{ | |
| 490 ExpandSuperinstructionBySandboxingBytes( | |
| 491 2 /* mov */ + 4 /* lea */ + 2 /* mov */ + 4 /* lea */, | |
| 492 &instruction_begin, data, valid_targets); | |
| 493 } | | |
| 494 | |
| 495 (((0x89 | 0x8b) 0xf6 # mov %esi,%esi | |
| 496 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi | |
| 497 REX_X (0x89 | 0x8b) 0xff # mov %edi,%edi | |
| 498 0x49 0x8d 0x3c 0x3f) | # lea (%r15,%rdi,1),%rdi | |
| 499 | |
| 500 (REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi | |
| 501 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi | |
| 502 (0x89 | 0x8b) 0xff # mov %edi,%edi | |
| 503 0x49 0x8d 0x3c 0x3f)) # lea (%r15,%rdi,1),%rdi | |
| 504 string_instruction_rsi_rdi | |
| 505 @{ | |
| 506 ExpandSuperinstructionBySandboxingBytes( | |
| 507 2 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */ | |
| 508 /* == 3 (* mov *) + 4 (* lea *) + 2 (* mov *) + 4 (* lea *) */, | |
| 509 &instruction_begin, data, valid_targets); | |
| 510 } | | |
| 511 | |
| 512 REX_X (0x89 | 0x8b) 0xf6 . # mov %esi,%esi | |
| 513 0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi | |
| 514 REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi | |
| 515 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi | |
| 516 string_instruction_rsi_rdi | |
| 517 @{ | |
| 518 ExpandSuperinstructionBySandboxingBytes( | |
| 519 3 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */, | |
| 520 &instruction_begin, data, valid_targets); | |
| 521 }; | |
| 522 | |
| 523 # All the "special" instructions (== instructions which obey non-standard | |
| 524 # rules). Three groups: | |
| 525 # * %rsp/%rsp related instructions (these instructions are special because | |
| 526 # they must be in the range %r15...%r15+4294967295 except momentarily they | |
| 527 # can be in the range 0...4294967295) | |
| 528 # * string instructions (which can not use %r15 as base and thus need special | |
| 529 # handling both in compiler and validator) | |
| 530 # * naclcall/nacljmp (indirect jumps need special care) | |
| 531 special_instruction = | |
| 532 (rbp_modifications | | |
| 533 rsp_modifications | | |
| 534 rbp_sandboxing | | |
| 535 rsp_sandboxing | | |
| 536 sandbox_instruction_rsi_no_rdi | | |
| 537 sandbox_instruction_rdi_no_rsi | | |
| 538 sandbox_instruction_rsi_rdi | | |
| 539 naclcall_or_nacljmp) | |
| 540 # Mark the instruction as special - currently this information is used only | |
| 541 # in tests, but in the future we may use it for dynamic code modification | |
| 542 # support. | |
| 543 @{ | |
| 544 instruction_info_collected |= SPECIAL_INSTRUCTION; | |
| 545 }; | |
| 546 | |
| 547 # Remove special instructions which are only allowed in special cases. | |
| 548 normal_instruction = one_instruction - special_instruction; | |
| 549 | |
| 550 # Check if call is properly aligned. | |
| 551 # | |
| 552 # For direct call we explicitly encode all variations. For indirect call | |
| 553 # we accept all the special instructions which ends with register-addressed | |
| 554 # indirect call. | |
| 555 call_alignment = | |
| 556 ((normal_instruction & | |
| 557 # Direct call | |
| 558 ((data16 REX_RXB? 0xe8 rel16) | | |
| 559 (REX_WRXB? 0xe8 rel32) | | |
| 560 (data16 REXW_RXB 0xe8 rel32))) | | |
| 561 (special_instruction & | |
| 562 # Indirect call | |
| 563 (any* data16? REX_WRXB? 0xff ((opcode_2 | opcode_3) any* & | |
| 564 modrm_registers)))) | |
| 565 # Call instruction must aligned to the end of bundle. Previously this was | |
| 566 # strict requirement, today it's just warning to aid with debugging. | |
| 567 @{ | |
| 568 if (((current_position - data) & kBundleMask) != kBundleMask) | |
| 569 instruction_info_collected |= BAD_CALL_ALIGNMENT; | |
| 570 }; | |
| 571 | |
| 572 # This action calls user's callback (if needed) and cleans up validator's | |
| 573 # internal state. | |
| 574 # | |
| 575 # We call the user callback if there are validation errors or if the | |
| 576 # CALL_USER_CALLBACK_ON_EACH_INSTRUCTION option is used. | |
| 577 # | |
| 578 # After that we move instruction_begin and clean all the variables which | |
| 579 # only used in the processing of a single instruction (prefixes, operand | |
| 580 # states and instruction_info_collected). | |
| 581 action end_of_instruction_cleanup { | |
| 582 /* Call user-supplied callback. */ | |
| 583 instruction_end = current_position + 1; | |
| 584 if ((instruction_info_collected & VALIDATION_ERRORS_MASK) || | |
| 585 (options & CALL_USER_CALLBACK_ON_EACH_INSTRUCTION)) { | |
| 586 result &= user_callback( | |
| 587 instruction_begin, instruction_end, | |
| 588 instruction_info_collected | | |
| 589 ((restricted_register << RESTRICTED_REGISTER_SHIFT) & | |
| 590 RESTRICTED_REGISTER_MASK), callback_data); | |
| 591 } | |
| 592 | |
| 593 /* On successful match the instruction_begin must point to the next byte | |
| 594 * to be able to report the new offset as the start of instruction | |
| 595 * causing error. */ | |
| 596 instruction_begin = instruction_end; | |
| 597 | |
| 598 /* Mark start of the next instruction as a valid target for jump. | |
| 599 * Note: we mark start of the next instruction here, not start of the | |
| 600 * current one because memory access check should be able to clear this | |
| 601 * bit when restricted register is used. */ | |
| 602 MarkValidJumpTarget(instruction_begin - data, valid_targets); | |
| 603 | |
| 604 /* Clear variables. */ | |
| 605 instruction_info_collected = 0; | |
| 606 SET_REX_PREFIX(FALSE); | |
| 607 /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */ | |
| 608 SET_VEX_PREFIX2(VEX_R | VEX_X | VEX_B); | |
| 609 SET_VEX_PREFIX3(0x00); | |
| 610 operand_states = 0; | |
| 611 base = 0; | |
| 612 index = 0; | |
| 613 } | |
| 614 | |
| 615 # This action reports fatal error detected by DFA. | |
| 616 action report_fatal_error { | |
| 617 result &= user_callback(instruction_begin, current_position, | |
| 618 UNRECOGNIZED_INSTRUCTION, callback_data); | |
| 619 /* | |
| 620 * Process the next bundle: "continue" here is for the "for" cycle in | |
| 621 * the ValidateChunkAMD64 function. | |
| 622 * | |
| 623 * It does not affect the case which we really care about (when code | |
| 624 * is validatable), but makes it possible to detect more errors in one | |
| 625 * run in tools like ncval. | |
| 626 */ | |
| 627 continue; | |
| 628 } | |
| 629 | |
| 630 # This is main ragel machine: it does 99% of validation work. There are only | |
| 631 # one thing to do with bundle if this machine accepts the bundle: | |
| 632 # * check for the state of the restricted_register at the end of the bundle. | |
| 633 # It's an error is %rbp or %rsp is restricted at the end of the bundle. | |
| 634 # Additionally if all the bundles are fine you need to check that direct jumps | |
| 635 # are corect. Thiis is done in the following way: | |
| 636 # * DFA fills two arrays: valid_targets and jump_dests. | |
| 637 # * ProcessInvalidJumpTargets checks that "jump_dests & !valid_targets == 0". | |
| 638 # All other checks are done here. | |
| 639 | |
| 640 main := ((call_alignment | normal_instruction | special_instruction) | |
| 641 @end_of_instruction_cleanup)* | |
| 642 $!report_fatal_error; | |
| 643 | |
| 644 }%% | |
| 645 | |
| 646 %% write data; | |
| 647 | |
| 648 enum OperandKind { | |
| 649 OPERAND_SANDBOX_IRRELEVANT = 0, | |
| 650 /* | |
| 651 * Currently we do not distinguish 8bit and 16bit modifications from | |
| 652 * OPERAND_SANDBOX_UNRESTRICTED to match the behavior of the old validator. | |
| 653 * | |
| 654 * 8bit operands must be distinguished from other types because the REX prefix | |
| 655 * regulates the choice between %ah and %spl, as well as %ch and %bpl. | |
| 656 */ | |
| 657 OPERAND_SANDBOX_8BIT, | |
| 658 OPERAND_SANDBOX_RESTRICTED, | |
| 659 OPERAND_SANDBOX_UNRESTRICTED | |
| 660 }; | |
| 661 | |
| 662 #define SET_OPERAND_NAME(N, S) operand_states |= ((S) << ((N) << 3)) | |
| 663 #define SET_OPERAND_FORMAT(N, T) SET_OPERAND_FORMAT_ ## T(N) | |
| 664 #define SET_OPERAND_FORMAT_OPERAND_FORMAT_8_BIT(N) \ | |
| 665 operand_states |= OPERAND_SANDBOX_8BIT << (5 + ((N) << 3)) | |
| 666 #define SET_OPERAND_FORMAT_OPERAND_FORMAT_16_BIT(N) \ | |
| 667 operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((N) << 3)) | |
| 668 #define SET_OPERAND_FORMAT_OPERAND_FORMAT_32_BIT(N) \ | |
| 669 operand_states |= OPERAND_SANDBOX_RESTRICTED << (5 + ((N) << 3)) | |
| 670 #define SET_OPERAND_FORMAT_OPERAND_FORMAT_64_BIT(N) \ | |
| 671 operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((N) << 3)) | |
| 672 #define CHECK_OPERAND(N, S, T) \ | |
| 673 ((operand_states & (0xff << ((N) << 3))) == ((S | (T << 5)) << ((N) << 3))) | |
| 674 | |
| 675 static INLINE void CheckAccess(ptrdiff_t instruction_begin, | |
| 676 enum OperandName base, | |
| 677 enum OperandName index, | |
| 678 uint8_t restricted_register, | |
| 679 bitmap_word *valid_targets, | |
| 680 uint32_t *instruction_info_collected) { | |
| 681 if ((base == REG_RIP) || (base == REG_R15) || | |
| 682 (base == REG_RSP) || (base == REG_RBP)) { | |
| 683 if ((index == NO_REG) || (index == REG_RIZ)) | |
| 684 { /* do nothing. */ } | |
| 685 else if (index == restricted_register) | |
| 686 BitmapClearBit(valid_targets, instruction_begin), | |
| 687 *instruction_info_collected |= RESTRICTED_REGISTER_USED; | |
| 688 else | |
| 689 *instruction_info_collected |= UNRESTRICTED_INDEX_REGISTER; | |
| 690 } else { | |
| 691 *instruction_info_collected |= FORBIDDEN_BASE_REGISTER; | |
| 692 } | |
| 693 } | |
| 694 | |
| 695 | |
| 696 static INLINE void Process0Operands(enum OperandName *restricted_register, | |
| 697 uint32_t *instruction_info_collected) { | |
| 698 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special | |
| 699 * instruction, not with regular instruction. */ | |
| 700 if (*restricted_register == REG_RSP) { | |
| 701 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; | |
| 702 } else if (*restricted_register == REG_RBP) { | |
| 703 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; | |
| 704 } | |
| 705 *restricted_register = NO_REG; | |
| 706 } | |
| 707 | |
| 708 static INLINE void Process1Operand(enum OperandName *restricted_register, | |
| 709 uint32_t *instruction_info_collected, | |
| 710 uint8_t rex_prefix, | |
| 711 uint32_t operand_states) { | |
| 712 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special | |
| 713 * instruction, not with regular instruction. */ | |
| 714 if (*restricted_register == REG_RSP) { | |
| 715 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; | |
| 716 } else if (*restricted_register == REG_RBP) { | |
| 717 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; | |
| 718 } | |
| 719 *restricted_register = NO_REG; | |
| 720 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) || | |
| 721 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) || | |
| 722 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) { | |
| 723 *instruction_info_collected |= R15_MODIFIED; | |
| 724 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || | |
| 725 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_RESTRICTED) || | |
| 726 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) { | |
| 727 *instruction_info_collected |= BPL_MODIFIED; | |
| 728 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || | |
| 729 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_RESTRICTED) || | |
| 730 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) { | |
| 731 *instruction_info_collected |= SPL_MODIFIED; | |
| 732 } | |
| 733 } | |
| 734 | |
| 735 static INLINE void Process1OperandZeroExtends( | |
| 736 enum OperandName *restricted_register, | |
| 737 uint32_t *instruction_info_collected, | |
| 738 uint8_t rex_prefix, | |
| 739 uint32_t operand_states) { | |
| 740 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special | |
| 741 * instruction, not with regular instruction. */ | |
| 742 if (*restricted_register == REG_RSP) { | |
| 743 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; | |
| 744 } else if (*restricted_register == REG_RBP) { | |
| 745 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; | |
| 746 } | |
| 747 *restricted_register = NO_REG; | |
| 748 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) || | |
| 749 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) || | |
| 750 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) { | |
| 751 *instruction_info_collected |= R15_MODIFIED; | |
| 752 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || | |
| 753 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) { | |
| 754 *instruction_info_collected |= BPL_MODIFIED; | |
| 755 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || | |
| 756 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) { | |
| 757 *instruction_info_collected |= SPL_MODIFIED; | |
| 758 /* Take 2 bits of operand type from operand_states as *restricted_register, | |
| 759 * make sure operand_states denotes a register (4th bit == 0). */ | |
| 760 } else if ((operand_states & 0x70) == (OPERAND_SANDBOX_RESTRICTED << 5)) { | |
| 761 *restricted_register = operand_states & 0x0f; | |
| 762 } | |
| 763 } | |
| 764 | |
| 765 static INLINE void Process2Operands(enum OperandName *restricted_register, | |
| 766 uint32_t *instruction_info_collected, | |
| 767 uint8_t rex_prefix, | |
| 768 uint32_t operand_states) { | |
| 769 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special | |
| 770 * instruction, not with regular instruction. */ | |
| 771 if (*restricted_register == REG_RSP) { | |
| 772 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; | |
| 773 } else if (*restricted_register == REG_RBP) { | |
| 774 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; | |
| 775 } | |
| 776 *restricted_register = NO_REG; | |
| 777 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) || | |
| 778 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) || | |
| 779 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED) || | |
| 780 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_8BIT) || | |
| 781 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_RESTRICTED) || | |
| 782 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) { | |
| 783 *instruction_info_collected |= R15_MODIFIED; | |
| 784 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || | |
| 785 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_RESTRICTED) || | |
| 786 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED) || | |
| 787 (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || | |
| 788 CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_RESTRICTED) || | |
| 789 CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) { | |
| 790 *instruction_info_collected |= BPL_MODIFIED; | |
| 791 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || | |
| 792 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_RESTRICTED) || | |
| 793 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED) || | |
| 794 (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || | |
| 795 CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_RESTRICTED) || | |
| 796 CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) { | |
| 797 *instruction_info_collected |= SPL_MODIFIED; | |
| 798 } | |
| 799 } | |
| 800 | |
| 801 static INLINE void Process2OperandsZeroExtends( | |
| 802 enum OperandName *restricted_register, | |
| 803 uint32_t *instruction_info_collected, | |
| 804 uint8_t rex_prefix, | |
| 805 uint32_t operand_states) { | |
| 806 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special | |
| 807 * instruction, not with regular instruction. */ | |
| 808 if (*restricted_register == REG_RSP) { | |
| 809 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; | |
| 810 } else if (*restricted_register == REG_RBP) { | |
| 811 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; | |
| 812 } | |
| 813 *restricted_register = NO_REG; | |
| 814 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) || | |
| 815 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) || | |
| 816 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED) || | |
| 817 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_8BIT) || | |
| 818 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_RESTRICTED) || | |
| 819 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) { | |
| 820 *instruction_info_collected |= R15_MODIFIED; | |
| 821 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || | |
| 822 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED) || | |
| 823 (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || | |
| 824 CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) { | |
| 825 *instruction_info_collected |= BPL_MODIFIED; | |
| 826 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || | |
| 827 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED) || | |
| 828 (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || | |
| 829 CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) { | |
| 830 *instruction_info_collected |= SPL_MODIFIED; | |
| 831 /* Take 2 bits of operand type from operand_states as *restricted_register, | |
| 832 * make sure operand_states denotes a register (4th bit == 0). */ | |
| 833 } else if ((operand_states & 0x70) == (OPERAND_SANDBOX_RESTRICTED << 5)) { | |
| 834 *restricted_register = operand_states & 0x0f; | |
| 835 if (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_RESTRICTED)) { | |
| 836 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; | |
| 837 } else if (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_RESTRICTED)) { | |
| 838 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; | |
| 839 } | |
| 840 /* Take 2 bits of operand type from operand_states as *restricted_register, | |
| 841 * make sure operand_states denotes a register (12th bit == 0). */ | |
| 842 } else if ((operand_states & 0x7000) == (OPERAND_SANDBOX_RESTRICTED << 13)) { | |
| 843 *restricted_register = (operand_states & 0x0f00) >> 8; | |
| 844 } | |
| 845 } | |
| 846 | |
| 847 /* | |
| 848 * This function merges "dangerous" instruction with sandboxing instructions to | |
| 849 * get a "superinstruction" and unmarks in-between jump targets. | |
| 850 */ | |
| 851 static INLINE void ExpandSuperinstructionBySandboxingBytes( | |
| 852 size_t sandbox_instructions_size, | |
| 853 const uint8_t **instruction_begin, | |
| 854 const uint8_t *data, | |
| 855 bitmap_word *valid_targets) { | |
| 856 *instruction_begin -= sandbox_instructions_size; | |
| 857 /* | |
| 858 * We need to unmark start of the "dangerous" instruction itself, too, but we | |
| 859 * don't need to mark the beginning of the whole "superinstruction" - that's | |
| 860 * why we move start by one byte and don't change the length. | |
| 861 */ | |
| 862 UnmarkValidJumpTargets((*instruction_begin + 1 - data), | |
| 863 sandbox_instructions_size, | |
| 864 valid_targets); | |
| 865 } | |
| 866 | |
| 867 /* | |
| 868 * Return TRUE if naclcall or nacljmp uses the same register in all three | |
| 869 * instructions. | |
| 870 * | |
| 871 * This version is for the case where "add %src_register, %dst_register" with | |
| 872 * dst in RM field and src in REG field of ModR/M byte is used. | |
| 873 * | |
| 874 * There are five possible forms: | |
| 875 * | |
| 876 * 0: 83 eX e0 and $~0x1f,E86 | |
| 877 * 3: 4? 01 fX add RBASE,R86 | |
| 878 * 6: ff eX jmpq *R86 | |
| 879 * ^ ^ | |
| 880 * instruction_begin current_position | |
| 881 * | |
| 882 * 0: 4? 83 eX e0 and $~0x1f,E86 | |
| 883 * 4: 4? 01 fX add RBASE,R86 | |
| 884 * 7: ff eX jmpq *R86 | |
| 885 * ^ ^ | |
| 886 * instruction_begin current_position | |
| 887 * | |
| 888 * 0: 83 eX e0 and $~0x1f,E86 | |
| 889 * 3: 4? 01 fX add RBASE,R86 | |
| 890 * 6: 4? ff eX jmpq *R86 | |
| 891 * ^ ^ | |
| 892 * instruction_begin current_position | |
| 893 * | |
| 894 * 0: 4? 83 eX e0 and $~0x1f,E86 | |
| 895 * 4: 4? 01 fX add RBASE,R86 | |
| 896 * 7: 4? ff eX jmpq *R86 | |
| 897 * ^ ^ | |
| 898 * instruction_begin current_position | |
| 899 * | |
| 900 * 0: 4? 83 eX e0 and $~0x1f,E64 | |
| 901 * 4: 4? 01 fX add RBASE,R64 | |
| 902 * 7: 4? ff eX jmpq *R64 | |
| 903 * ^ ^ | |
| 904 * instruction_begin current_position | |
| 905 * | |
| 906 * We don't care about "?" (they are checked by DFA). | |
| 907 */ | |
| 908 static INLINE Bool VerifyNaclCallOrJmpAddToRM(const uint8_t *instruction_begin, | |
| 909 const uint8_t *current_position) { | |
| 910 return | |
| 911 RMFromModRM(instruction_begin[-5]) == RMFromModRM(instruction_begin[-1]) && | |
| 912 RMFromModRM(instruction_begin[-5]) == RMFromModRM(current_position[0]); | |
| 913 } | |
| 914 | |
| 915 /* | |
| 916 * Return TRUE if naclcall or nacljmp uses the same register in all three | |
| 917 * instructions. | |
| 918 * | |
| 919 * This version is for the case where "add %src_register, %dst_register" with | |
| 920 * dst in REG field and src in RM field of ModR/M byte is used. | |
| 921 * | |
| 922 * There are five possible forms: | |
| 923 * | |
| 924 * 0: 83 eX e0 and $~0x1f,E86 | |
| 925 * 3: 4? 03 Xf add RBASE,R86 | |
| 926 * 6: ff eX jmpq *R86 | |
| 927 * ^ ^ | |
| 928 * instruction_begin current_position | |
| 929 * | |
| 930 * 0: 4? 83 eX e0 and $~0x1f,E86 | |
| 931 * 4: 4? 03 Xf add RBASE,R86 | |
| 932 * 7: ff eX jmpq *R86 | |
| 933 * ^ ^ | |
| 934 * instruction_begin current_position | |
| 935 * | |
| 936 * 0: 83 eX e0 and $~0x1f,E86 | |
| 937 * 3: 4? 03 Xf add RBASE,R86 | |
| 938 * 6: 4? ff eX jmpq *R86 | |
| 939 * ^ ^ | |
| 940 * instruction_begin current_position | |
| 941 * | |
| 942 * 0: 4? 83 eX e0 and $~0x1f,E86 | |
| 943 * 4: 4? 03 Xf add RBASE,R86 | |
| 944 * 7: 4? ff eX jmpq *R86 | |
| 945 * ^ ^ | |
| 946 * instruction_begin current_position | |
| 947 * | |
| 948 * 0: 4? 83 eX e0 and $~0x1f,E64 | |
| 949 * 4: 4? 03 Xf add RBASE,R64 | |
| 950 * 7: 4? ff eX jmpq *R64 | |
| 951 * ^ ^ | |
| 952 * instruction_begin current_position | |
| 953 * | |
| 954 * We don't care about "?" (they are checked by DFA). | |
| 955 */ | |
| 956 static INLINE Bool VerifyNaclCallOrJmpAddToReg( | |
| 957 const uint8_t *instruction_begin, | |
| 958 const uint8_t *current_position) { | |
| 959 return | |
| 960 RMFromModRM(instruction_begin[-5]) == RegFromModRM(instruction_begin[-1]) && | |
| 961 RMFromModRM(instruction_begin[-5]) == RMFromModRM(current_position[0]); | |
| 962 } | |
| 963 | |
| 964 /* | |
| 965 * This function checks that naclcall or nacljmp are correct (that is: three | |
| 966 * component instructions match) and if that is true then it merges call or jmp | |
| 967 * with a sandboxing to get a "superinstruction" and removes in-between jump | |
| 968 * targets. If it's not true then it triggers "unrecognized instruction" error | |
| 969 * condition. | |
| 970 * | |
| 971 * This version is for the case where "add with dst register in RM field" | |
| 972 * (opcode 0x01) and "add without REX prefix" is used. | |
| 973 * | |
| 974 * There are two possibile forms: | |
| 975 * | |
| 976 * 0: 83 eX e0 and $~0x1f,E86 | |
| 977 * 3: 4? 01 fX add RBASE,R86 | |
| 978 * 6: ff eX jmpq *R86 | |
| 979 * ^ ^ | |
| 980 * instruction_begin current_position | |
| 981 * | |
| 982 * 0: 83 eX e0 and $~0x1f,E86 | |
| 983 * 3: 4? 01 fX add RBASE,R86 | |
| 984 * 6: 4? ff eX jmpq *R86 | |
| 985 * ^ ^ | |
| 986 * instruction_begin current_position | |
| 987 */ | |
| 988 static INLINE void ProcessNaclCallOrJmpAddToRMNoRex( | |
| 989 uint32_t *instruction_info_collected, | |
| 990 const uint8_t **instruction_begin, | |
| 991 const uint8_t *current_position, | |
| 992 const uint8_t *data, | |
| 993 bitmap_word *valid_targets) { | |
| 994 if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position)) | |
| 995 ExpandSuperinstructionBySandboxingBytes( | |
| 996 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); | |
| 997 else | |
| 998 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; | |
| 999 } | |
| 1000 | |
| 1001 /* | |
| 1002 * This function checks that naclcall or nacljmp are correct (that is: three | |
| 1003 * component instructions match) and if that is true then it merges call or jmp | |
| 1004 * with a sandboxing to get a "superinstruction" and removes in-between jump | |
| 1005 * targets. If it's not true then it triggers "unrecognized instruction" error | |
| 1006 * condition. | |
| 1007 * | |
| 1008 * This version is for the case where "add with dst register in REG field" | |
| 1009 * (opcode 0x03) and "add without REX prefix" is used. | |
| 1010 * | |
| 1011 * There are two possibile forms: | |
| 1012 * | |
| 1013 * 0: 83 eX e0 and $~0x1f,E86 | |
| 1014 * 3: 4? 03 Xf add RBASE,R86 | |
| 1015 * 6: ff eX jmpq *R86 | |
| 1016 * ^ ^ | |
| 1017 * instruction_begin current_position | |
| 1018 * | |
| 1019 * 0: 83 eX e0 and $~0x1f,E86 | |
| 1020 * 3: 4? 03 Xf add RBASE,R86 | |
| 1021 * 6: 4? ff eX jmpq *R86 | |
| 1022 * ^ ^ | |
| 1023 * instruction_begin current_position | |
| 1024 */ | |
| 1025 static INLINE void ProcessNaclCallOrJmpAddToRegNoRex( | |
| 1026 uint32_t *instruction_info_collected, | |
| 1027 const uint8_t **instruction_begin, | |
| 1028 const uint8_t *current_position, | |
| 1029 const uint8_t *data, | |
| 1030 bitmap_word *valid_targets) { | |
| 1031 if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position)) | |
| 1032 ExpandSuperinstructionBySandboxingBytes( | |
| 1033 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); | |
| 1034 else | |
| 1035 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; | |
| 1036 } | |
| 1037 | |
| 1038 /* | |
| 1039 * This function checks that naclcall or nacljmp are correct (that is: three | |
| 1040 * component instructions match) and if that is true then it merges call or jmp | |
| 1041 * with a sandboxing to get a "superinstruction" and removes in-between jump | |
| 1042 * targets. If it's not true then it triggers "unrecognized instruction" error | |
| 1043 * condition. | |
| 1044 * | |
| 1045 * This version is for the case where "add with dst register in RM field" | |
| 1046 * (opcode 0x01) and "add without REX prefix" is used. | |
| 1047 * | |
| 1048 * There are three possibile forms: | |
| 1049 * | |
| 1050 * 0: 4? 83 eX e0 and $~0x1f,E86 | |
| 1051 * 4: 4? 01 fX add RBASE,R86 | |
| 1052 * 7: ff eX jmpq *R86 | |
| 1053 * ^ ^ | |
| 1054 * instruction_begin current_position | |
| 1055 * | |
| 1056 * 0: 4? 83 eX e0 and $~0x1f,E86 | |
| 1057 * 4: 4? 01 fX add RBASE,R86 | |
| 1058 * 7: 4? ff eX jmpq *R86 | |
| 1059 * ^ ^ | |
| 1060 * instruction_begin current_position | |
| 1061 * | |
| 1062 * 0: 4? 83 eX e0 and $~0x1f,E64 | |
| 1063 * 4: 4? 01 fX add RBASE,R64 | |
| 1064 * 7: 4? ff eX jmpq *R64 | |
| 1065 * ^ ^ | |
| 1066 * instruction_begin current_position | |
| 1067 */ | |
| 1068 static INLINE void ProcessNaclCallOrJmpAddToRMWithRex( | |
| 1069 uint32_t *instruction_info_collected, | |
| 1070 const uint8_t **instruction_begin, | |
| 1071 const uint8_t *current_position, | |
| 1072 const uint8_t *data, | |
| 1073 bitmap_word *valid_targets) { | |
| 1074 if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position)) | |
| 1075 ExpandSuperinstructionBySandboxingBytes( | |
| 1076 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); | |
| 1077 else | |
| 1078 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; | |
| 1079 } | |
| 1080 | |
| 1081 /* | |
| 1082 * This function checks that naclcall or nacljmp are correct (that is: three | |
| 1083 * component instructions match) and if that is true then it merges call or jmp | |
| 1084 * with a sandboxing to get a "superinstruction" and removes in-between jump | |
| 1085 * targets. If it's not true then it triggers "unrecognized instruction" error | |
| 1086 * condition. | |
| 1087 * | |
| 1088 * This version is for the case where "add with dst register in REG field" | |
| 1089 * (opcode 0x03) and "add without REX prefix" is used. | |
| 1090 * | |
| 1091 * There are three possibile forms: | |
| 1092 * | |
| 1093 * 0: 4? 83 eX e0 and $~0x1f,E86 | |
| 1094 * 4: 4? 03 Xf add RBASE,R86 | |
| 1095 * 7: ff eX jmpq *R86 | |
| 1096 * ^ ^ | |
| 1097 * instruction_begin current_position | |
| 1098 * | |
| 1099 * 0: 4? 83 eX e0 and $~0x1f,E86 | |
| 1100 * 4: 4? 03 Xf add RBASE,R86 | |
| 1101 * 7: 4? ff eX jmpq *R86 | |
| 1102 * ^ ^ | |
| 1103 * instruction_begin current_position | |
| 1104 * | |
| 1105 * 0: 4? 83 eX e0 and $~0x1f,E64 | |
| 1106 * 4: 4? 03 Xf add RBASE,R64 | |
| 1107 * 7: 4? ff eX jmpq *R64 | |
| 1108 * ^ ^ | |
| 1109 * instruction_begin current_position | |
| 1110 */ | |
| 1111 static INLINE void ProcessNaclCallOrJmpAddToRegWithRex( | |
| 1112 uint32_t *instruction_info_collected, | |
| 1113 const uint8_t **instruction_begin, | |
| 1114 const uint8_t *current_position, | |
| 1115 const uint8_t *data, | |
| 1116 bitmap_word *valid_targets) { | |
| 1117 if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position)) | |
| 1118 ExpandSuperinstructionBySandboxingBytes( | |
| 1119 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); | |
| 1120 else | |
| 1121 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; | |
| 1122 } | |
| 1123 | |
| 1124 | |
| 1125 Bool ValidateChunkAMD64(const uint8_t *data, size_t size, | |
| 1126 uint32_t options, | |
| 1127 const NaClCPUFeaturesX86 *cpu_features, | |
| 1128 ValidationCallbackFunc user_callback, | |
| 1129 void *callback_data) { | |
| 1130 bitmap_word valid_targets_small; | |
| 1131 bitmap_word jump_dests_small; | |
| 1132 bitmap_word *valid_targets; | |
| 1133 bitmap_word *jump_dests; | |
| 1134 const uint8_t *current_position; | |
| 1135 const uint8_t *end_of_bundle; | |
| 1136 int result = TRUE; | |
| 1137 | |
| 1138 CHECK(sizeof valid_targets_small == sizeof jump_dests_small); | |
| 1139 CHECK(size % kBundleSize == 0); | |
| 1140 | |
| 1141 /* | |
| 1142 * For a very small sequences (one bundle) malloc is too expensive. | |
| 1143 * | |
| 1144 * Note1: we allocate one extra bit, because we set valid jump target bits | |
| 1145 * _after_ instructions, so there will be one at the end of the chunk. | |
| 1146 * | |
| 1147 * Note2: we don't ever mark first bit as a valid jump target but this is | |
| 1148 * not a problem because any aligned address is valid jump target. | |
| 1149 */ | |
| 1150 if ((size + 1) <= (sizeof valid_targets_small * 8)) { | |
| 1151 valid_targets_small = 0; | |
| 1152 valid_targets = &valid_targets_small; | |
| 1153 jump_dests_small = 0; | |
| 1154 jump_dests = &jump_dests_small; | |
| 1155 } else { | |
| 1156 valid_targets = BitmapAllocate(size + 1); | |
| 1157 jump_dests = BitmapAllocate(size + 1); | |
| 1158 if (!valid_targets || !jump_dests) { | |
| 1159 free(jump_dests); | |
| 1160 free(valid_targets); | |
| 1161 errno = ENOMEM; | |
| 1162 return FALSE; | |
| 1163 } | |
| 1164 } | |
| 1165 | |
| 1166 /* | |
| 1167 * This option is usually used in tests: we will process the whole chunk | |
| 1168 * in one pass. Usually each bundle is processed separately which means | |
| 1169 * instructions (and super-instructions) can not cross borders of the bundle. | |
| 1170 */ | |
| 1171 if (options & PROCESS_CHUNK_AS_A_CONTIGUOUS_STREAM) | |
| 1172 end_of_bundle = data + size; | |
| 1173 else | |
| 1174 end_of_bundle = data + kBundleSize; | |
| 1175 | |
| 1176 /* | |
| 1177 * Main loop. Here we process the data array bundle-after-bundle. | |
| 1178 * Ragel-produced DFA does all the checks with one exception: direct jumps. | |
| 1179 * It collects the two arrays: valid_targets and jump_dests which are used | |
| 1180 * to test direct jumps later. | |
| 1181 */ | |
| 1182 for (current_position = data; | |
| 1183 current_position < data + size; | |
| 1184 current_position = end_of_bundle, | |
| 1185 end_of_bundle = current_position + kBundleSize) { | |
| 1186 /* Start of the instruction being processed. */ | |
| 1187 const uint8_t *instruction_begin = current_position; | |
| 1188 /* Only used locally in the end_of_instruction_cleanup action. */ | |
| 1189 const uint8_t *instruction_end; | |
| 1190 int current_state; | |
| 1191 uint32_t instruction_info_collected = 0; | |
| 1192 /* Keeps one byte of information per operand in the current instruction: | |
| 1193 * 2 bits for register kinds, | |
| 1194 * 5 bits for register numbers (16 regs plus RIZ). */ | |
| 1195 uint32_t operand_states = 0; | |
| 1196 enum OperandName base = NO_REG; | |
| 1197 enum OperandName index = NO_REG; | |
| 1198 enum OperandName restricted_register = | |
| 1199 EXTRACT_RESTRICTED_REGISTER_INITIAL_VALUE(options); | |
| 1200 uint8_t rex_prefix = FALSE; | |
| 1201 /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */ | |
| 1202 uint8_t vex_prefix2 = VEX_R | VEX_X | VEX_B; | |
| 1203 uint8_t vex_prefix3 = 0x00; | |
| 1204 | |
| 1205 %% write init; | |
| 1206 %% write exec; | |
| 1207 | |
| 1208 /* | |
| 1209 * Ragel DFA accepted the bundle, but we still need to make sure the last | |
| 1210 * instruction haven't left %rbp or %rsp in restricted state. | |
| 1211 */ | |
| 1212 if (restricted_register == REG_RBP) | |
| 1213 result &= user_callback(end_of_bundle, end_of_bundle, | |
| 1214 RESTRICTED_RBP_UNPROCESSED | | |
| 1215 ((REG_RBP << RESTRICTED_REGISTER_SHIFT) & | |
| 1216 RESTRICTED_REGISTER_MASK), callback_data); | |
| 1217 else if (restricted_register == REG_RSP) | |
| 1218 result &= user_callback(end_of_bundle, end_of_bundle, | |
| 1219 RESTRICTED_RSP_UNPROCESSED | | |
| 1220 ((REG_RSP << RESTRICTED_REGISTER_SHIFT) & | |
| 1221 RESTRICTED_REGISTER_MASK), callback_data); | |
| 1222 } | |
| 1223 | |
| 1224 /* | |
| 1225 * Check the direct jumps. All the targets from jump_dests must be in | |
| 1226 * valid_targets. | |
| 1227 */ | |
| 1228 result &= ProcessInvalidJumpTargets(data, size, valid_targets, jump_dests, | |
| 1229 user_callback, callback_data); | |
| 1230 | |
| 1231 /* We only use malloc for a large code sequences */ | |
| 1232 if (jump_dests != &jump_dests_small) free(jump_dests); | |
| 1233 if (valid_targets != &valid_targets_small) free(valid_targets); | |
| 1234 if (!result) errno = EINVAL; | |
| 1235 return result; | |
| 1236 } | |
| OLD | NEW |