OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright (c) 2012 The Native Client Authors. All rights reserved. |
| 3 * Use of this source code is governed by a BSD-style license that can be |
| 4 * found in the LICENSE file. |
| 5 */ |
| 6 |
| 7 #ifndef NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_MIPS_VALIDATOR_H |
| 8 #define NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_MIPS_VALIDATOR_H |
| 9 |
| 10 /* |
| 11 * The SFI validator, and some utility classes it uses. |
| 12 */ |
| 13 |
| 14 #include <stdint.h> |
| 15 #include <stdlib.h> |
| 16 #include <vector> |
| 17 |
| 18 #include "native_client/src/include/nacl_string.h" |
| 19 #include "native_client/src/trusted/validator_mips/address_set.h" |
| 20 #include "native_client/src/trusted/validator_mips/decode.h" |
| 21 #include "native_client/src/trusted/validator_mips/inst_classes.h" |
| 22 #include "native_client/src/trusted/validator_mips/model.h" |
| 23 #include "native_client/src/include/portability.h" |
| 24 |
| 25 namespace nacl_mips_val { |
| 26 |
| 27 /* |
| 28 * Forward declarations of classes used by-reference in the validator, and |
| 29 * defined at the end of this file. |
| 30 */ |
| 31 class CodeSegment; |
| 32 class DecodedInstruction; |
| 33 class ProblemSink; |
| 34 |
| 35 |
| 36 /* |
| 37 * A simple model of an instruction bundle. Bundles consist of one or more |
| 38 * instructions (two or more, in the useful case); the precise size is |
| 39 * controlled by the parameters passed into SfiValidator, below. |
| 40 */ |
| 41 class Bundle { |
| 42 public: |
| 43 Bundle(uint32_t virtual_base, uint32_t size_bytes) |
| 44 : virtual_base_(virtual_base), size_(size_bytes) {} |
| 45 |
| 46 uint32_t begin_addr() const { return virtual_base_; } |
| 47 uint32_t end_addr() const { return virtual_base_ + size_; } |
| 48 |
| 49 bool operator!=(const Bundle &other) const { |
| 50 // Note that all Bundles are currently assumed to be the same size. |
| 51 return virtual_base_ != other.virtual_base_; |
| 52 } |
| 53 |
| 54 private: |
| 55 uint32_t virtual_base_; |
| 56 uint32_t size_; |
| 57 }; |
| 58 |
| 59 |
| 60 /* |
| 61 * The SFI validator itself. The validator is controlled by the following |
| 62 * inputs: |
| 63 * bytes_per_bundle: the number of bytes in each bundle of instructions. |
| 64 * Currently this tends to be 16, but we've evaluated alternatives. |
| 65 * code_region_bytes: number of bytes in the code region, starting at address |
| 66 * 0 and including the trampolines, etc. Must be a power of two. |
| 67 * data_region_bits: number of bytes in the data region, starting at address |
| 68 * 0 and including the code region. Must be a power of two. |
| 69 * read_only_registers: registers that untrusted code must not alter (but may |
| 70 * read). This currently applies to t6 - jump mask, t7 - load/store mask |
| 71 * and t8 - tls index. |
| 72 * data_address_registers: registers that must contain a valid data-region |
| 73 * address at all times. This currently applies to the stack pointer, but |
| 74 * could be extended to include a frame pointer for C-like languages. |
| 75 * |
| 76 * The values of these inputs will typically be taken from the headers of |
| 77 * untrusted code -- either by the ABI version they indicate, or (perhaps in |
| 78 * the future) explicit indicators of what SFI model they follow. |
| 79 */ |
| 80 class SfiValidator { |
| 81 public: |
| 82 SfiValidator(uint32_t bytes_per_bundle, |
| 83 uint32_t code_region_bytes, |
| 84 uint32_t data_region_bytes, |
| 85 nacl_mips_dec::RegisterList read_only_registers, |
| 86 nacl_mips_dec::RegisterList data_address_registers); |
| 87 |
| 88 /* |
| 89 * The main validator entry point. Validates the provided CodeSegments, |
| 90 * which must be in sorted order, reporting any problems through the |
| 91 * ProblemSink. |
| 92 * |
| 93 * Returns true iff no problems were found. |
| 94 */ |
| 95 bool validate(const std::vector<CodeSegment> &, ProblemSink *out); |
| 96 |
| 97 /* |
| 98 * Checks whether the given Register always holds a valid data region address. |
| 99 * This implies that the register is safe to use in unguarded stores. |
| 100 */ |
| 101 bool is_data_address_register(nacl_mips_dec::Register) const; |
| 102 |
| 103 uint32_t data_address_mask() const { return data_address_mask_; } |
| 104 uint32_t code_address_mask() const { return code_address_mask_; } |
| 105 uint32_t code_region_bytes() const { return code_region_bytes_; } |
| 106 uint32_t bytes_per_bundle() const { return bytes_per_bundle_; } |
| 107 uint32_t code_region_start() const { return code_region_start_; } |
| 108 uint32_t trampoline_region_start() const { return trampoline_region_start_; } |
| 109 |
| 110 nacl_mips_dec::RegisterList read_only_registers() const { |
| 111 return read_only_registers_; |
| 112 } |
| 113 nacl_mips_dec::RegisterList data_address_registers() const { |
| 114 return data_address_registers_; |
| 115 } |
| 116 |
| 117 // Returns the Bundle containing a given address. |
| 118 const Bundle bundle_for_address(uint32_t) const; |
| 119 |
| 120 /* |
| 121 * Change masks: this is useful for debugging and cannot be completely |
| 122 * controlled with constructor arguments |
| 123 */ |
| 124 void change_masks(uint32_t code_address_mask, uint32_t data_address_mask) { |
| 125 code_address_mask_ = code_address_mask; |
| 126 data_address_mask_ = data_address_mask; |
| 127 } |
| 128 |
| 129 /* |
| 130 * Find all the branch instructions which jump on the dest_address. |
| 131 * Returns a syscall on address 0 if no such instruction is found. |
| 132 */ |
| 133 bool find_branch(const std::vector<CodeSegment> &segments, |
| 134 const AddressSet &branches, |
| 135 uint32_t dest_address, |
| 136 std::vector<DecodedInstruction> &instrs) const; |
| 137 |
| 138 private: |
| 139 bool is_bundle_head(uint32_t address) const; |
| 140 |
| 141 /* |
| 142 * Validates a straight-line execution of the code, applying patterns. This |
| 143 * is the first validation pass, which fills out the AddressSets for |
| 144 * consumption by later analyses. |
| 145 * branches: gets filled in with the address of every direct branch. |
| 146 * branch_targets: gets filled in with the target address of every direct |
| 147 * branch. |
| 148 * critical: gets filled in with every address that isn't safe to jump to, |
| 149 * because it would split an otherwise-safe pseudo-op. |
| 150 * |
| 151 * Returns true iff no problems were found. |
| 152 */ |
| 153 bool validate_fallthrough(const CodeSegment &, ProblemSink *, |
| 154 AddressSet *branches, AddressSet *branch_targets, |
| 155 AddressSet *critical); |
| 156 |
| 157 /* |
| 158 * Factor of validate_fallthrough, above. Checks a single instruction using |
| 159 * the instruction patterns defined in the .cc file, with two possible |
| 160 * results: |
| 161 * 1. No patterns matched, or all were safe: nothing happens. |
| 162 * 2. Patterns matched and were unsafe: problems get sent to 'out'. |
| 163 */ |
| 164 bool apply_patterns(const DecodedInstruction &, ProblemSink *out); |
| 165 |
| 166 /* |
| 167 * Factor of validate_fallthrough, above. Checks a pair of instructions using |
| 168 * the instruction patterns defined in the .cc file, with three possible |
| 169 * results: |
| 170 * 1. No patterns matched: nothing happens. |
| 171 * 2. Patterns matched and were safe: the addresses are filled into |
| 172 * 'critical' for use by the second pass. |
| 173 * 3. Patterns matched and were unsafe: problems get sent to 'out'. |
| 174 */ |
| 175 bool apply_patterns(const DecodedInstruction &first, |
| 176 const DecodedInstruction &second, AddressSet *critical, ProblemSink *out); |
| 177 |
| 178 |
| 179 /* |
| 180 * 2nd pass - checks if some branch instruction tries to jump onto the middle |
| 181 * of the pseudoinstruction, and if some pseudoinstruction crosses bundle |
| 182 * borders. |
| 183 */ |
| 184 bool validate_pseudos(const SfiValidator &sfi, |
| 185 const std::vector<CodeSegment> &segments, |
| 186 const AddressSet &branches, |
| 187 const AddressSet &branch_targets, |
| 188 const AddressSet &critical, |
| 189 ProblemSink *out); |
| 190 |
| 191 uint32_t const bytes_per_bundle_; |
| 192 uint32_t const code_region_bytes_; |
| 193 uint32_t data_address_mask_; |
| 194 uint32_t code_address_mask_; |
| 195 |
| 196 // TODO(RT-RK): Think about pulling these values from some config header. |
| 197 static uint32_t const code_region_start_ = 0x20000; |
| 198 static uint32_t const trampoline_region_start_ = 0x10000; |
| 199 |
| 200 // Registers which cannot be modified by untrusted code. |
| 201 nacl_mips_dec::RegisterList read_only_registers_; |
| 202 // Registers which must always contain a valid data region address. |
| 203 nacl_mips_dec::RegisterList data_address_registers_; |
| 204 const nacl_mips_dec::DecoderState *decode_state_; |
| 205 }; |
| 206 |
| 207 |
| 208 /* |
| 209 * A facade that combines an Instruction with its address and a ClassDecoder. |
| 210 * This makes the patterns substantially easier to write and read than managing |
| 211 * all three variables separately. |
| 212 * |
| 213 * ClassDecoders do all decoding on-demand, with no caching. DecodedInstruction |
| 214 * has knowledge of the validator, and pairs a ClassDecoder with a constant |
| 215 * Instruction -- so it can cache commonly used values, and does. Caching |
| 216 * safety and defs doubles validator performance. Add other values only |
| 217 * under guidance of a profiler. |
| 218 */ |
| 219 class DecodedInstruction { |
| 220 public: |
| 221 DecodedInstruction(uint32_t vaddr, nacl_mips_dec::Instruction inst, |
| 222 const nacl_mips_dec::ClassDecoder &decoder); |
| 223 // We permit the default copy ctor and assignment operator. |
| 224 |
| 225 uint32_t addr() const { return vaddr_; } |
| 226 |
| 227 // The methods below mirror those on ClassDecoder, but are cached and cheap. |
| 228 nacl_mips_dec::SafetyLevel safety() const { return safety_; } |
| 229 |
| 230 // The methods below pull values from ClassDecoder on demand. |
| 231 const nacl_mips_dec::Register base_address_register() const { |
| 232 return decoder_->base_address_register(inst_); |
| 233 } |
| 234 |
| 235 nacl_mips_dec::Register dest_gpr_reg() const { |
| 236 return decoder_->dest_gpr_reg(inst_); |
| 237 } |
| 238 |
| 239 nacl_mips_dec::Register target_reg() const { |
| 240 return decoder_->target_reg(inst_); |
| 241 } |
| 242 |
| 243 uint32_t dest_addr() const { |
| 244 return decoder_->dest_addr(inst_, addr()); |
| 245 } |
| 246 |
| 247 bool has_delay_slot() const { |
| 248 return decoder_-> has_delay_slot(); |
| 249 } |
| 250 |
| 251 bool is_jal() const { |
| 252 return decoder_-> is_jal(); |
| 253 } |
| 254 |
| 255 bool is_mask(const nacl_mips_dec::Register& dest, |
| 256 const nacl_mips_dec::Register& mask) const { |
| 257 return decoder_->is_mask(inst_, dest, mask); |
| 258 } |
| 259 |
| 260 bool is_jmp_reg() const { |
| 261 return decoder_->is_jmp_reg(); |
| 262 } |
| 263 |
| 264 bool is_load_store() const { |
| 265 return decoder_->is_load_store(); |
| 266 } |
| 267 |
| 268 bool is_direct_jump() const { |
| 269 return decoder_->is_direct_jump(); |
| 270 } |
| 271 |
| 272 bool is_dest_gpr_reg(nacl_mips_dec::RegisterList rl) const { |
| 273 return rl.contains_any(dest_gpr_reg()); |
| 274 } |
| 275 |
| 276 bool is_data_reg_mask() const { |
| 277 return is_mask(dest_gpr_reg(), nacl_mips_dec::kRegisterLoadStoreMask); |
| 278 } |
| 279 |
| 280 private: |
| 281 uint32_t vaddr_; |
| 282 nacl_mips_dec::Instruction inst_; |
| 283 const nacl_mips_dec::ClassDecoder *decoder_; |
| 284 |
| 285 nacl_mips_dec::SafetyLevel safety_; |
| 286 }; |
| 287 |
| 288 |
| 289 /* |
| 290 * Describes a memory region that contains executable code. Note that the code |
| 291 * need not live in its final location -- we pretend the code lives at the |
| 292 * provided start_addr, regardless of where the base pointer actually points. |
| 293 */ |
| 294 class CodeSegment { |
| 295 public: |
| 296 CodeSegment(const uint8_t *base, uint32_t start_addr, size_t size) |
| 297 : base_(base), start_addr_(start_addr), size_(size) {} |
| 298 |
| 299 uint32_t begin_addr() const { return start_addr_; } |
| 300 uint32_t end_addr() const { return start_addr_ + size_; } |
| 301 uint32_t size() const { return size_; } |
| 302 bool contains_address(uint32_t a) const { |
| 303 return (a >= begin_addr()) && (a < end_addr()); |
| 304 } |
| 305 |
| 306 const nacl_mips_dec::Instruction operator[](uint32_t address) const { |
| 307 const uint8_t *element = &base_[address - start_addr_]; |
| 308 return nacl_mips_dec::Instruction( |
| 309 *reinterpret_cast<const uint32_t *>(element)); |
| 310 } |
| 311 |
| 312 bool operator<(const CodeSegment &other) const { |
| 313 return start_addr_ < other.start_addr_; |
| 314 } |
| 315 |
| 316 private: |
| 317 const uint8_t *base_; |
| 318 uint32_t start_addr_; |
| 319 size_t size_; |
| 320 }; |
| 321 |
| 322 |
| 323 /* |
| 324 * A class that consumes reports of validation problems, and may decide whether |
| 325 * to continue validating, or early-exit. |
| 326 * |
| 327 * In a sel_ldr context, we early-exit at the first problem we find. In an SDK |
| 328 * context, however, we collect more reports to give the developer feedback; |
| 329 * even then it may be desirable to exit after the first, say, 200 reports. |
| 330 */ |
| 331 class ProblemSink { |
| 332 public: |
| 333 virtual ~ProblemSink() {} |
| 334 |
| 335 /* |
| 336 * Reports a problem in untrusted code. |
| 337 * vaddr: the virtual address where the problem occurred. Note that this is |
| 338 * probably not the address of memory that contains the offending |
| 339 * instruction, since we allow CodeSegments to lie about their base |
| 340 * addresses. |
| 341 * safety: the safety level of the instruction, as reported by the decoder. |
| 342 * This may be MAY_BE_SAFE while still indicating a problem. |
| 343 * problem_code: a constant string, defined below, that uniquely identifies |
| 344 * the problem. These are not intended to be human-readable, and should |
| 345 * be looked up for localization and presentation to the developer. |
| 346 * ref_vaddr: A second virtual address of more code that affected the |
| 347 * decision -- typically a branch target. |
| 348 */ |
| 349 virtual void report_problem(uint32_t vaddr, nacl_mips_dec::SafetyLevel safety, |
| 350 const nacl::string &problem_code, uint32_t ref_vaddr = 0) { |
| 351 UNREFERENCED_PARAMETER(vaddr); |
| 352 UNREFERENCED_PARAMETER(safety); |
| 353 UNREFERENCED_PARAMETER(problem_code); |
| 354 UNREFERENCED_PARAMETER(ref_vaddr); |
| 355 } |
| 356 |
| 357 /* |
| 358 * Called after each invocation of report_problem. If this returns false, |
| 359 * the validator exits. |
| 360 */ |
| 361 virtual bool should_continue() { return false; } |
| 362 }; |
| 363 |
| 364 /* |
| 365 * Strings used to describe the current set of validator problems. These may |
| 366 * be worth splitting into a separate header file, so that dev tools can |
| 367 * process them into localized messages without needing to pull in the whole |
| 368 * validator...we'll see. |
| 369 */ |
| 370 |
| 371 // An instruction is unsafe -- more information in the SafetyLevel. |
| 372 const char * const kProblemUnsafe = "kProblemUnsafe"; |
| 373 // A branch would break a pseudo-operation pattern. |
| 374 const char * const kProblemBranchSplitsPattern = "kProblemBranchSplitsPattern"; |
| 375 // A branch targets an invalid code address (out of segment). |
| 376 const char * const kProblemBranchInvalidDest = "kProblemBranchInvalidDest"; |
| 377 // A load/store uses an unsafe (non-masked) base address. |
| 378 const char * const kProblemUnsafeLoadStore = "kProblemUnsafeLoadStore"; |
| 379 // An instruction updates a data-address register (e.g. SP) without masking. |
| 380 const char * const kProblemUnsafeDataWrite = "kProblemUnsafeDataWrite"; |
| 381 // An instruction updates a read-only register (e.g. t6, t7, t8). |
| 382 const char * const kProblemReadOnlyRegister = "kProblemReadOnlyRegister"; |
| 383 // A pseudo-op pattern crosses a bundle boundary. |
| 384 const char * const kProblemPatternCrossesBundle = |
| 385 "kProblemPatternCrossesBundle"; |
| 386 // A linking branch instruction is not in the last bundle slot. |
| 387 const char * const kProblemMisalignedCall = "kProblemMisalignedCall"; |
| 388 // A data register is found in a branch delay slot. |
| 389 const char * const kProblemDataRegInDelaySlot = "kProblemDataRegInDelaySlot"; |
| 390 // A jump to trampoline instruction which is not a start of a bundle. |
| 391 const char * const kProblemUnalignedJumpToTrampoline = |
| 392 "kProblemUnalignedJumpToTrampoline"; |
| 393 // A jump register instruction is not guarded. |
| 394 const char * const kProblemUnsafeJumpRegister = "kProblemUnsafeJumpRegister"; |
| 395 |
| 396 } // namespace |
| 397 |
| 398 #endif // NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_MIPS_VALIDATOR_H |
OLD | NEW |