Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(167)

Side by Side Diff: src/trusted/validator_mips/validator.h

Issue 9979025: [MIPS] Adding validator for MIPS architecture. (Closed) Base URL: http://src.chromium.org/native_client/trunk/src/native_client/
Patch Set: Update per initial code review. Nexes removed. Created 8 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright (c) 2012 The Native Client Authors. All rights reserved.
3 * Use of this source code is governed by a BSD-style license that can be
4 * found in the LICENSE file.
5 */
6
7 #ifndef NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_MIPS_VALIDATOR_H
8 #define NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_MIPS_VALIDATOR_H
9
10 /*
11 * The SFI validator, and some utility classes it uses.
12 */
13
14 #include <stdint.h>
15 #include <stdlib.h>
16 #include <vector>
17
18 #include "native_client/src/include/nacl_string.h"
19 #include "native_client/src/trusted/validator_mips/address_set.h"
20 #include "native_client/src/trusted/validator_mips/decode.h"
21 #include "native_client/src/trusted/validator_mips/inst_classes.h"
22 #include "native_client/src/trusted/validator_mips/model.h"
23 #include "native_client/src/include/portability.h"
24
25 namespace nacl_mips_val {
26
27 /*
28 * Forward declarations of classes used by-reference in the validator, and
29 * defined at the end of this file.
30 */
31 class CodeSegment;
32 class DecodedInstruction;
33 class ProblemSink;
34
35
36 /*
37 * A simple model of an instruction bundle. Bundles consist of one or more
38 * instructions (two or more, in the useful case); the precise size is
39 * controlled by the parameters passed into SfiValidator, below.
40 */
41 class Bundle {
42 public:
43 Bundle(uint32_t virtual_base, uint32_t size_bytes)
44 : virtual_base_(virtual_base), size_(size_bytes) {}
45
46 uint32_t begin_addr() const { return virtual_base_; }
47 uint32_t end_addr() const { return virtual_base_ + size_; }
48
49 bool operator!=(const Bundle &other) const {
50 // Note that all Bundles are currently assumed to be the same size.
51 return virtual_base_ != other.virtual_base_;
52 }
53
54 private:
55 uint32_t virtual_base_;
56 uint32_t size_;
57 };
58
59
60 /*
61 * The SFI validator itself. The validator is controlled by the following
62 * inputs:
63 * bytes_per_bundle: the number of bytes in each bundle of instructions.
64 * Currently this tends to be 16, but we've evaluated alternatives.
65 * code_region_bytes: number of bytes in the code region, starting at address
66 * 0 and including the trampolines, etc. Must be a power of two.
67 * data_region_bits: number of bytes in the data region, starting at address
68 * 0 and including the code region. Must be a power of two.
69 * read_only_registers: registers that untrusted code must not alter (but may
70 * read). This currently applies to t6 - jump mask, t7 - load/store mask
71 * and t8 - tls index.
72 * data_address_registers: registers that must contain a valid data-region
73 * address at all times. This currently applies to the stack pointer, but
74 * could be extended to include a frame pointer for C-like languages.
75 *
76 * The values of these inputs will typically be taken from the headers of
77 * untrusted code -- either by the ABI version they indicate, or (perhaps in
78 * the future) explicit indicators of what SFI model they follow.
79 */
80 class SfiValidator {
81 public:
82 SfiValidator(uint32_t bytes_per_bundle,
83 uint32_t code_region_bytes,
84 uint32_t data_region_bytes,
85 nacl_mips_dec::RegisterList read_only_registers,
86 nacl_mips_dec::RegisterList data_address_registers);
87
88 /*
89 * The main validator entry point. Validates the provided CodeSegments,
90 * which must be in sorted order, reporting any problems through the
91 * ProblemSink.
92 *
93 * Returns true iff no problems were found.
94 */
95 bool validate(const std::vector<CodeSegment> &, ProblemSink *out);
96
97 /*
98 * Checks whether the given Register always holds a valid data region address.
99 * This implies that the register is safe to use in unguarded stores.
100 */
101 bool is_data_address_register(nacl_mips_dec::Register) const;
102
103 uint32_t data_address_mask() const { return data_address_mask_; }
104 uint32_t code_address_mask() const { return code_address_mask_; }
105 uint32_t code_region_bytes() const { return code_region_bytes_; }
106 uint32_t bytes_per_bundle() const { return bytes_per_bundle_; }
107 uint32_t code_region_start() const { return code_region_start_; }
108 uint32_t trampoline_region_start() const { return trampoline_region_start_; }
109
110 nacl_mips_dec::RegisterList read_only_registers() const {
111 return read_only_registers_;
112 }
113 nacl_mips_dec::RegisterList data_address_registers() const {
114 return data_address_registers_;
115 }
116
117 // Returns the Bundle containing a given address.
118 const Bundle bundle_for_address(uint32_t) const;
119
120 /*
121 * Change masks: this is useful for debugging and cannot be completely
122 * controlled with constructor arguments
123 */
124 void change_masks(uint32_t code_address_mask, uint32_t data_address_mask) {
125 code_address_mask_ = code_address_mask;
126 data_address_mask_ = data_address_mask;
127 }
128
129 /*
130 * Find all the branch instructions which jump on the dest_address.
131 * Returns a syscall on address 0 if no such instruction is found.
132 */
133 bool find_branch(const std::vector<CodeSegment> &segments,
134 const AddressSet &branches,
135 uint32_t dest_address,
136 std::vector<DecodedInstruction> &instrs) const;
137
138 private:
139 bool is_bundle_head(uint32_t address) const;
140
141 /*
142 * Validates a straight-line execution of the code, applying patterns. This
143 * is the first validation pass, which fills out the AddressSets for
144 * consumption by later analyses.
145 * branches: gets filled in with the address of every direct branch.
146 * branch_targets: gets filled in with the target address of every direct
147 * branch.
148 * critical: gets filled in with every address that isn't safe to jump to,
149 * because it would split an otherwise-safe pseudo-op.
150 *
151 * Returns true iff no problems were found.
152 */
153 bool validate_fallthrough(const CodeSegment &, ProblemSink *,
154 AddressSet *branches, AddressSet *branch_targets,
155 AddressSet *critical);
156
157 /*
158 * Factor of validate_fallthrough, above. Checks a single instruction using
159 * the instruction patterns defined in the .cc file, with two possible
160 * results:
161 * 1. No patterns matched, or all were safe: nothing happens.
162 * 2. Patterns matched and were unsafe: problems get sent to 'out'.
163 */
164 bool apply_patterns(const DecodedInstruction &, ProblemSink *out);
165
166 /*
167 * Factor of validate_fallthrough, above. Checks a pair of instructions using
168 * the instruction patterns defined in the .cc file, with three possible
169 * results:
170 * 1. No patterns matched: nothing happens.
171 * 2. Patterns matched and were safe: the addresses are filled into
172 * 'critical' for use by the second pass.
173 * 3. Patterns matched and were unsafe: problems get sent to 'out'.
174 */
175 bool apply_patterns(const DecodedInstruction &first,
176 const DecodedInstruction &second, AddressSet *critical, ProblemSink *out);
177
178
179 /*
180 * 2nd pass - checks if some branch instruction tries to jump onto the middle
181 * of the pseudoinstruction, and if some pseudoinstruction crosses bundle
182 * borders.
183 */
184 bool validate_pseudos(const SfiValidator &sfi,
185 const std::vector<CodeSegment> &segments,
186 const AddressSet &branches,
187 const AddressSet &branch_targets,
188 const AddressSet &critical,
189 ProblemSink *out);
190
191 uint32_t const bytes_per_bundle_;
192 uint32_t const code_region_bytes_;
193 uint32_t data_address_mask_;
194 uint32_t code_address_mask_;
195
196 // TODO(RT-RK): Think about pulling these values from some config header.
197 static uint32_t const code_region_start_ = 0x20000;
198 static uint32_t const trampoline_region_start_ = 0x10000;
199
200 // Registers which cannot be modified by untrusted code.
201 nacl_mips_dec::RegisterList read_only_registers_;
202 // Registers which must always contain a valid data region address.
203 nacl_mips_dec::RegisterList data_address_registers_;
204 const nacl_mips_dec::DecoderState *decode_state_;
205 };
206
207
208 /*
209 * A facade that combines an Instruction with its address and a ClassDecoder.
210 * This makes the patterns substantially easier to write and read than managing
211 * all three variables separately.
212 *
213 * ClassDecoders do all decoding on-demand, with no caching. DecodedInstruction
214 * has knowledge of the validator, and pairs a ClassDecoder with a constant
215 * Instruction -- so it can cache commonly used values, and does. Caching
216 * safety and defs doubles validator performance. Add other values only
217 * under guidance of a profiler.
218 */
219 class DecodedInstruction {
220 public:
221 DecodedInstruction(uint32_t vaddr, nacl_mips_dec::Instruction inst,
222 const nacl_mips_dec::ClassDecoder &decoder);
223 // We permit the default copy ctor and assignment operator.
224
225 uint32_t addr() const { return vaddr_; }
226
227 // The methods below mirror those on ClassDecoder, but are cached and cheap.
228 nacl_mips_dec::SafetyLevel safety() const { return safety_; }
229
230 // The methods below pull values from ClassDecoder on demand.
231 const nacl_mips_dec::Register base_address_register() const {
232 return decoder_->base_address_register(inst_);
233 }
234
235 nacl_mips_dec::Register dest_gpr_reg() const {
236 return decoder_->dest_gpr_reg(inst_);
237 }
238
239 nacl_mips_dec::Register target_reg() const {
240 return decoder_->target_reg(inst_);
241 }
242
243 uint32_t dest_addr() const {
244 return decoder_->dest_addr(inst_, addr());
245 }
246
247 bool has_delay_slot() const {
248 return decoder_-> has_delay_slot();
249 }
250
251 bool is_jal() const {
252 return decoder_-> is_jal();
253 }
254
255 bool is_mask(const nacl_mips_dec::Register& dest,
256 const nacl_mips_dec::Register& mask) const {
257 return decoder_->is_mask(inst_, dest, mask);
258 }
259
260 bool is_jmp_reg() const {
261 return decoder_->is_jmp_reg();
262 }
263
264 bool is_load_store() const {
265 return decoder_->is_load_store();
266 }
267
268 bool is_direct_jump() const {
269 return decoder_->is_direct_jump();
270 }
271
272 bool is_dest_gpr_reg(nacl_mips_dec::RegisterList rl) const {
273 return rl.contains_any(dest_gpr_reg());
274 }
275
276 bool is_data_reg_mask() const {
277 return is_mask(dest_gpr_reg(), nacl_mips_dec::kRegisterLoadStoreMask);
278 }
279
280 private:
281 uint32_t vaddr_;
282 nacl_mips_dec::Instruction inst_;
283 const nacl_mips_dec::ClassDecoder *decoder_;
284
285 nacl_mips_dec::SafetyLevel safety_;
286 };
287
288
289 /*
290 * Describes a memory region that contains executable code. Note that the code
291 * need not live in its final location -- we pretend the code lives at the
292 * provided start_addr, regardless of where the base pointer actually points.
293 */
294 class CodeSegment {
295 public:
296 CodeSegment(const uint8_t *base, uint32_t start_addr, size_t size)
297 : base_(base), start_addr_(start_addr), size_(size) {}
298
299 uint32_t begin_addr() const { return start_addr_; }
300 uint32_t end_addr() const { return start_addr_ + size_; }
301 uint32_t size() const { return size_; }
302 bool contains_address(uint32_t a) const {
303 return (a >= begin_addr()) && (a < end_addr());
304 }
305
306 const nacl_mips_dec::Instruction operator[](uint32_t address) const {
307 const uint8_t *element = &base_[address - start_addr_];
308 return nacl_mips_dec::Instruction(
309 *reinterpret_cast<const uint32_t *>(element));
310 }
311
312 bool operator<(const CodeSegment &other) const {
313 return start_addr_ < other.start_addr_;
314 }
315
316 private:
317 const uint8_t *base_;
318 uint32_t start_addr_;
319 size_t size_;
320 };
321
322
323 /*
324 * A class that consumes reports of validation problems, and may decide whether
325 * to continue validating, or early-exit.
326 *
327 * In a sel_ldr context, we early-exit at the first problem we find. In an SDK
328 * context, however, we collect more reports to give the developer feedback;
329 * even then it may be desirable to exit after the first, say, 200 reports.
330 */
331 class ProblemSink {
332 public:
333 virtual ~ProblemSink() {}
334
335 /*
336 * Reports a problem in untrusted code.
337 * vaddr: the virtual address where the problem occurred. Note that this is
338 * probably not the address of memory that contains the offending
339 * instruction, since we allow CodeSegments to lie about their base
340 * addresses.
341 * safety: the safety level of the instruction, as reported by the decoder.
342 * This may be MAY_BE_SAFE while still indicating a problem.
343 * problem_code: a constant string, defined below, that uniquely identifies
344 * the problem. These are not intended to be human-readable, and should
345 * be looked up for localization and presentation to the developer.
346 * ref_vaddr: A second virtual address of more code that affected the
347 * decision -- typically a branch target.
348 */
349 virtual void report_problem(uint32_t vaddr, nacl_mips_dec::SafetyLevel safety,
350 const nacl::string &problem_code, uint32_t ref_vaddr = 0) {
351 UNREFERENCED_PARAMETER(vaddr);
352 UNREFERENCED_PARAMETER(safety);
353 UNREFERENCED_PARAMETER(problem_code);
354 UNREFERENCED_PARAMETER(ref_vaddr);
355 }
356
357 /*
358 * Called after each invocation of report_problem. If this returns false,
359 * the validator exits.
360 */
361 virtual bool should_continue() { return false; }
362 };
363
364 /*
365 * Strings used to describe the current set of validator problems. These may
366 * be worth splitting into a separate header file, so that dev tools can
367 * process them into localized messages without needing to pull in the whole
368 * validator...we'll see.
369 */
370
371 // An instruction is unsafe -- more information in the SafetyLevel.
372 const char * const kProblemUnsafe = "kProblemUnsafe";
373 // A branch would break a pseudo-operation pattern.
374 const char * const kProblemBranchSplitsPattern = "kProblemBranchSplitsPattern";
375 // A branch targets an invalid code address (out of segment).
376 const char * const kProblemBranchInvalidDest = "kProblemBranchInvalidDest";
377 // A load/store uses an unsafe (non-masked) base address.
378 const char * const kProblemUnsafeLoadStore = "kProblemUnsafeLoadStore";
379 // An instruction updates a data-address register (e.g. SP) without masking.
380 const char * const kProblemUnsafeDataWrite = "kProblemUnsafeDataWrite";
381 // An instruction updates a read-only register (e.g. t6, t7, t8).
382 const char * const kProblemReadOnlyRegister = "kProblemReadOnlyRegister";
383 // A pseudo-op pattern crosses a bundle boundary.
384 const char * const kProblemPatternCrossesBundle =
385 "kProblemPatternCrossesBundle";
386 // A linking branch instruction is not in the last bundle slot.
387 const char * const kProblemMisalignedCall = "kProblemMisalignedCall";
388 // A data register is found in a branch delay slot.
389 const char * const kProblemDataRegInDelaySlot = "kProblemDataRegInDelaySlot";
390 // A jump to trampoline instruction which is not a start of a bundle.
391 const char * const kProblemUnalignedJumpToTrampoline =
392 "kProblemUnalignedJumpToTrampoline";
393 // A jump register instruction is not guarded.
394 const char * const kProblemUnsafeJumpRegister = "kProblemUnsafeJumpRegister";
395
396 } // namespace
397
398 #endif // NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_MIPS_VALIDATOR_H
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698