OLD | NEW |
---|---|
(Empty) | |
1 /* | |
2 * Copyright (c) 2012 The Native Client Authors. All rights reserved. | |
3 * Use of this source code is governed by a BSD-style license that can be | |
4 * found in the LICENSE file. | |
5 */ | |
6 | |
7 #ifndef NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_MIPS_VALIDATOR_H | |
8 #define NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_MIPS_VALIDATOR_H | |
9 | |
10 /* | |
11 * The SFI validator, and some utility classes it uses. | |
12 */ | |
13 | |
14 #include <stdint.h> | |
15 #include <stdlib.h> | |
16 #include <vector> | |
17 | |
18 #include "native_client/src/include/nacl_string.h" | |
19 #include "native_client/src/trusted/validator_mips/address_set.h" | |
20 #include "native_client/src/trusted/validator_mips/decode.h" | |
21 #include "native_client/src/trusted/validator_mips/inst_classes.h" | |
22 #include "native_client/src/trusted/validator_mips/model.h" | |
23 #include "native_client/src/include/portability.h" | |
Brad Chen
2012/05/04 22:49:50
includes should generally be in alphabetical order
| |
24 | |
25 namespace nacl_mips_val { | |
26 | |
27 /* | |
28 * Forward declarations of classes used by-reference in the validator, and | |
29 * defined at the end of this file. | |
30 */ | |
31 class CodeSegment; | |
32 class DecodedInstruction; | |
33 class ProblemSink; | |
34 | |
35 | |
36 /* | |
37 * A simple model of an instruction bundle. Bundles consist of one or more | |
38 * instructions (two or more, in the useful case); the precise size is | |
39 * controlled by the parameters passed into SfiValidator, below. | |
40 */ | |
41 class Bundle { | |
42 public: | |
43 Bundle(uint32_t virtual_base, uint32_t size_bytes) | |
44 : virtual_base_(virtual_base), size_(size_bytes) {} | |
45 | |
46 uint32_t begin_addr() const { return virtual_base_; } | |
47 uint32_t end_addr() const { return virtual_base_ + size_; } | |
48 | |
49 bool operator!=(const Bundle &other) const { | |
50 // Note that all Bundles are currently assumed to be the same size. | |
51 return virtual_base_ != other.virtual_base_; | |
52 } | |
53 | |
54 private: | |
55 uint32_t virtual_base_; | |
56 uint32_t size_; | |
57 }; | |
58 | |
59 | |
60 /* | |
61 * The SFI validator itself. The validator is controlled by the following | |
62 * inputs: | |
63 * bytes_per_bundle: the number of bytes in each bundle of instructions. | |
64 * Currently this tends to be 16, but we've evaluated alternatives. | |
65 * code_region_bytes: number of bytes in the code region, starting at address | |
66 * 0 and including the trampolines, etc. Must be a power of two. | |
67 * data_region_bits: number of bytes in the data region, starting at address | |
68 * 0 and including the code region. Must be a power of two. | |
69 * read_only_registers: registers that untrusted code must not alter (but may | |
70 * read). This currently applies to t6 - jump mask, t7 - load/store mask | |
71 * and t8 - tls index. | |
72 * data_address_registers: registers that must contain a valid data-region | |
73 * address at all times. This currently applies to the stack pointer, but | |
74 * could be extended to include a frame pointer for C-like languages. | |
75 * | |
76 * The values of these inputs will typically be taken from the headers of | |
77 * untrusted code -- either by the ABI version they indicate, or (perhaps in | |
78 * the future) explicit indicators of what SFI model they follow. | |
79 */ | |
80 class SfiValidator { | |
81 public: | |
82 SfiValidator(uint32_t bytes_per_bundle, | |
83 uint32_t code_region_bytes, | |
84 uint32_t data_region_bytes, | |
85 nacl_mips_dec::RegisterList read_only_registers, | |
86 nacl_mips_dec::RegisterList data_address_registers); | |
87 | |
88 /* | |
89 * The main validator entry point. Validates the provided CodeSegments, | |
90 * which must be in sorted order, reporting any problems through the | |
91 * ProblemSink. | |
92 * | |
93 * Returns true iff no problems were found. | |
94 */ | |
95 bool validate(const std::vector<CodeSegment> &, ProblemSink *out); | |
96 | |
97 /* | |
98 * Checks whether the given Register always holds a valid data region address. | |
99 * This implies that the register is safe to use in unguarded stores. | |
100 */ | |
101 bool is_data_address_register(nacl_mips_dec::Register) const; | |
102 | |
103 uint32_t data_address_mask() const { return data_address_mask_; } | |
104 uint32_t code_address_mask() const { return code_address_mask_; } | |
105 uint32_t code_region_bytes() const { return code_region_bytes_; } | |
106 uint32_t bytes_per_bundle() const { return bytes_per_bundle_; } | |
107 uint32_t code_region_start() const { return code_region_start_; } | |
108 uint32_t trampoline_region_start() const { return trampoline_region_start_; } | |
109 | |
110 nacl_mips_dec::RegisterList read_only_registers() const { | |
111 return read_only_registers_; | |
112 } | |
113 nacl_mips_dec::RegisterList data_address_registers() const { | |
114 return data_address_registers_; | |
115 } | |
116 | |
117 // Returns the Bundle containing a given address. | |
118 const Bundle bundle_for_address(uint32_t) const; | |
119 | |
120 /* | |
121 * Change masks: this is useful for debugging and cannot be completely | |
122 * controlled with constructor arguments | |
123 */ | |
124 void change_masks(uint32_t code_address_mask, uint32_t data_address_mask) { | |
125 code_address_mask_ = code_address_mask; | |
126 data_address_mask_ = data_address_mask; | |
127 } | |
128 | |
129 /* | |
130 * Find all the branch instructions which jump on the dest_address. | |
131 */ | |
132 bool find_branch(const std::vector<CodeSegment> &segments, | |
133 const AddressSet &branches, | |
134 uint32_t dest_address, | |
135 std::vector<DecodedInstruction> &instrs) const; | |
136 | |
137 private: | |
138 bool is_bundle_head(uint32_t address) const; | |
139 | |
140 /* | |
141 * Validates a straight-line execution of the code, applying patterns. This | |
142 * is the first validation pass, which fills out the AddressSets for | |
143 * consumption by later analyses. | |
144 * branches: gets filled in with the address of every direct branch. | |
145 * branch_targets: gets filled in with the target address of every direct | |
146 * branch. | |
147 * critical: gets filled in with every address that isn't safe to jump to, | |
148 * because it would split an otherwise-safe pseudo-op. | |
149 * | |
150 * Returns true iff no problems were found. | |
151 */ | |
152 bool validate_fallthrough(const CodeSegment &, ProblemSink *, | |
153 AddressSet *branches, AddressSet *branch_targets, | |
154 AddressSet *critical); | |
155 | |
156 /* | |
157 * Factor of validate_fallthrough, above. Checks a single instruction using | |
158 * the instruction patterns defined in the .cc file, with two possible | |
159 * results: | |
160 * 1. No patterns matched, or all were safe: nothing happens. | |
161 * 2. Patterns matched and were unsafe: problems get sent to 'out'. | |
162 */ | |
163 bool apply_patterns(const DecodedInstruction &, ProblemSink *out); | |
164 | |
165 /* | |
166 * Factor of validate_fallthrough, above. Checks a pair of instructions using | |
167 * the instruction patterns defined in the .cc file, with three possible | |
168 * results: | |
169 * 1. No patterns matched: nothing happens. | |
170 * 2. Patterns matched and were safe: the addresses are filled into | |
171 * 'critical' for use by the second pass. | |
172 * 3. Patterns matched and were unsafe: problems get sent to 'out'. | |
173 */ | |
174 bool apply_patterns(const DecodedInstruction &first, | |
175 const DecodedInstruction &second, AddressSet *critical, ProblemSink *out); | |
176 | |
177 | |
178 /* | |
179 * 2nd pass - checks if some branch instruction tries to jump onto the middle | |
180 * of the pseudo-instruction, and if some pseudo-instruction crosses bundle | |
181 * borders. | |
182 */ | |
183 bool validate_pseudos(const SfiValidator &sfi, | |
184 const std::vector<CodeSegment> &segments, | |
185 const AddressSet &branches, | |
186 const AddressSet &branch_targets, | |
187 const AddressSet &critical, | |
188 ProblemSink *out); | |
189 | |
190 uint32_t const bytes_per_bundle_; | |
191 uint32_t const code_region_bytes_; | |
192 uint32_t data_address_mask_; | |
193 uint32_t code_address_mask_; | |
194 | |
195 // TODO(RT-RK): Think about pulling these values from some config header. | |
196 static uint32_t const code_region_start_ = 0x20000; | |
197 static uint32_t const trampoline_region_start_ = 0x10000; | |
198 | |
199 // Registers which cannot be modified by untrusted code. | |
200 nacl_mips_dec::RegisterList read_only_registers_; | |
201 // Registers which must always contain a valid data region address. | |
202 nacl_mips_dec::RegisterList data_address_registers_; | |
203 const nacl_mips_dec::DecoderState *decode_state_; | |
204 }; | |
205 | |
206 | |
207 /* | |
208 * A facade that combines an Instruction with its address and a ClassDecoder. | |
209 * This makes the patterns substantially easier to write and read than managing | |
210 * all three variables separately. | |
211 * | |
212 * ClassDecoders do all decoding on-demand, with no caching. DecodedInstruction | |
213 * has knowledge of the validator, and pairs a ClassDecoder with a constant | |
214 * Instruction -- so it can cache commonly used values, and does. Caching | |
215 * safety and defs doubles validator performance. Add other values only | |
216 * under guidance of a profiler. | |
217 */ | |
218 class DecodedInstruction { | |
219 public: | |
220 DecodedInstruction(uint32_t vaddr, nacl_mips_dec::Instruction inst, | |
221 const nacl_mips_dec::ClassDecoder &decoder); | |
222 // We permit the default copy ctor and assignment operator. | |
223 | |
224 uint32_t addr() const { return vaddr_; } | |
225 | |
226 // The methods below mirror those on ClassDecoder, but are cached and cheap. | |
227 nacl_mips_dec::SafetyLevel safety() const { return safety_; } | |
228 | |
229 // The methods below pull values from ClassDecoder on demand. | |
230 const nacl_mips_dec::Register base_address_register() const { | |
231 return decoder_->base_address_register(inst_); | |
232 } | |
233 | |
234 nacl_mips_dec::Register dest_gpr_reg() const { | |
235 return decoder_->dest_gpr_reg(inst_); | |
236 } | |
237 | |
238 nacl_mips_dec::Register target_reg() const { | |
239 return decoder_->target_reg(inst_); | |
240 } | |
241 | |
242 uint32_t dest_addr() const { | |
243 return decoder_->dest_addr(inst_, addr()); | |
244 } | |
245 | |
246 bool has_delay_slot() const { | |
247 return decoder_-> has_delay_slot(); | |
248 } | |
249 | |
250 bool is_jal() const { | |
251 return decoder_-> is_jal(); | |
252 } | |
253 | |
254 bool is_mask(const nacl_mips_dec::Register& dest, | |
255 const nacl_mips_dec::Register& mask) const { | |
256 return decoder_->is_mask(inst_, dest, mask); | |
257 } | |
258 | |
259 bool is_jmp_reg() const { | |
260 return decoder_->is_jmp_reg(); | |
261 } | |
262 | |
263 bool is_load_store() const { | |
264 return decoder_->is_load_store(); | |
265 } | |
266 | |
267 bool is_direct_jump() const { | |
268 return decoder_->is_direct_jump(); | |
269 } | |
270 | |
271 bool is_dest_gpr_reg(nacl_mips_dec::RegisterList rl) const { | |
272 return rl.contains_any(dest_gpr_reg()); | |
273 } | |
274 | |
275 bool is_data_reg_mask() const { | |
276 return is_mask(dest_gpr_reg(), nacl_mips_dec::kRegisterLoadStoreMask); | |
277 } | |
278 | |
279 private: | |
280 uint32_t vaddr_; | |
281 nacl_mips_dec::Instruction inst_; | |
282 const nacl_mips_dec::ClassDecoder *decoder_; | |
283 | |
284 nacl_mips_dec::SafetyLevel safety_; | |
285 }; | |
286 | |
287 | |
288 /* | |
289 * Describes a memory region that contains executable code. Note that the code | |
290 * need not live in its final location -- we pretend the code lives at the | |
291 * provided start_addr, regardless of where the base pointer actually points. | |
292 */ | |
293 class CodeSegment { | |
294 public: | |
295 CodeSegment(const uint8_t *base, uint32_t start_addr, size_t size) | |
296 : base_(base), start_addr_(start_addr), size_(size) {} | |
297 | |
298 uint32_t begin_addr() const { return start_addr_; } | |
299 uint32_t end_addr() const { return start_addr_ + size_; } | |
300 uint32_t size() const { return size_; } | |
301 bool contains_address(uint32_t a) const { | |
302 return (a >= begin_addr()) && (a < end_addr()); | |
303 } | |
304 | |
305 const nacl_mips_dec::Instruction operator[](uint32_t address) const { | |
306 const uint8_t *element = &base_[address - start_addr_]; | |
307 return nacl_mips_dec::Instruction( | |
308 *reinterpret_cast<const uint32_t *>(element)); | |
309 } | |
310 | |
311 bool operator<(const CodeSegment &other) const { | |
312 return start_addr_ < other.start_addr_; | |
313 } | |
314 | |
315 private: | |
316 const uint8_t *base_; | |
317 uint32_t start_addr_; | |
318 size_t size_; | |
319 }; | |
320 | |
321 | |
322 /* | |
323 * A class that consumes reports of validation problems, and may decide whether | |
324 * to continue validating, or early-exit. | |
325 * | |
326 * In a sel_ldr context, we early-exit at the first problem we find. In an SDK | |
327 * context, however, we collect more reports to give the developer feedback; | |
328 * even then it may be desirable to exit after the first, say, 200 reports. | |
329 */ | |
330 class ProblemSink { | |
331 public: | |
332 virtual ~ProblemSink() {} | |
333 | |
334 /* | |
335 * Reports a problem in untrusted code. | |
336 * vaddr: the virtual address where the problem occurred. Note that this is | |
337 * probably not the address of memory that contains the offending | |
338 * instruction, since we allow CodeSegments to lie about their base | |
339 * addresses. | |
340 * safety: the safety level of the instruction, as reported by the decoder. | |
341 * This may be MAY_BE_SAFE while still indicating a problem. | |
342 * problem_code: a constant string, defined below, that uniquely identifies | |
343 * the problem. These are not intended to be human-readable, and should | |
344 * be looked up for localization and presentation to the developer. | |
345 * ref_vaddr: A second virtual address of more code that affected the | |
346 * decision -- typically a branch target. | |
347 */ | |
348 virtual void report_problem(uint32_t vaddr, nacl_mips_dec::SafetyLevel safety, | |
349 const nacl::string &problem_code, uint32_t ref_vaddr = 0) { | |
350 UNREFERENCED_PARAMETER(vaddr); | |
351 UNREFERENCED_PARAMETER(safety); | |
352 UNREFERENCED_PARAMETER(problem_code); | |
353 UNREFERENCED_PARAMETER(ref_vaddr); | |
354 } | |
355 | |
356 /* | |
357 * Called after each invocation of report_problem. If this returns false, | |
358 * the validator exits. | |
359 */ | |
360 virtual bool should_continue() { return false; } | |
361 }; | |
362 | |
363 /* | |
364 * Strings used to describe the current set of validator problems. These may | |
365 * be worth splitting into a separate header file, so that dev tools can | |
366 * process them into localized messages without needing to pull in the whole | |
367 * validator...we'll see. | |
368 */ | |
369 | |
370 // An instruction is unsafe -- more information in the SafetyLevel. | |
371 const char * const kProblemUnsafe = "kProblemUnsafe"; | |
372 // A branch would break a pseudo-operation pattern. | |
373 const char * const kProblemBranchSplitsPattern = "kProblemBranchSplitsPattern"; | |
374 // A branch targets an invalid code address (out of segment). | |
375 const char * const kProblemBranchInvalidDest = "kProblemBranchInvalidDest"; | |
376 // A load/store uses an unsafe (non-masked) base address. | |
377 const char * const kProblemUnsafeLoadStore = "kProblemUnsafeLoadStore"; | |
378 // An instruction updates a data-address register (e.g. SP) without masking. | |
379 const char * const kProblemUnsafeDataWrite = "kProblemUnsafeDataWrite"; | |
380 // An instruction updates a read-only register (e.g. t6, t7, t8). | |
381 const char * const kProblemReadOnlyRegister = "kProblemReadOnlyRegister"; | |
382 // A pseudo-op pattern crosses a bundle boundary. | |
383 const char * const kProblemPatternCrossesBundle = | |
384 "kProblemPatternCrossesBundle"; | |
385 // A linking branch instruction is not in the last bundle slot. | |
386 const char * const kProblemMisalignedCall = "kProblemMisalignedCall"; | |
387 // A data register is found in a branch delay slot. | |
388 const char * const kProblemDataRegInDelaySlot = "kProblemDataRegInDelaySlot"; | |
389 // A jump to trampoline instruction which is not a start of a bundle. | |
390 const char * const kProblemUnalignedJumpToTrampoline = | |
391 "kProblemUnalignedJumpToTrampoline"; | |
392 // A jump register instruction is not guarded. | |
393 const char * const kProblemUnsafeJumpRegister = "kProblemUnsafeJumpRegister"; | |
394 | |
395 } // namespace | |
396 | |
397 #endif // NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_MIPS_VALIDATOR_H | |
OLD | NEW |