OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "sandbox/linux/seccomp-bpf/codegen.h" | 5 #include "sandbox/linux/seccomp-bpf/codegen.h" |
6 | 6 |
7 #include <linux/filter.h> | 7 #include <linux/filter.h> |
8 | 8 |
9 #include <set> | 9 #include <utility> |
10 | 10 |
11 #include "base/logging.h" | 11 #include "base/logging.h" |
12 #include "sandbox/linux/seccomp-bpf/basicblock.h" | |
13 #include "sandbox/linux/seccomp-bpf/die.h" | |
14 #include "sandbox/linux/seccomp-bpf/instruction.h" | |
15 | 12 |
16 namespace sandbox { | 13 namespace sandbox { |
17 | 14 |
18 // Unfortunately this needs to be defined out-of-line because inline | 15 const CodeGen::Addr CodeGen::kNullAddr; |
19 // initializing a static member to "nullptr" requires "constexpr", | |
20 // which is currently banned by the Chromium style guide. | |
21 const CodeGen::Addr CodeGen::kNullAddr = nullptr; | |
22 | 16 |
23 CodeGen::CodeGen() : compiled_(false) {} | 17 CodeGen::CodeGen() : program_(), cache_() { |
18 } | |
19 CodeGen::~CodeGen() { | |
20 } | |
24 | 21 |
25 CodeGen::~CodeGen() { | 22 void CodeGen::Compile(CodeGen::Addr head, Program* out) { |
26 for (Instructions::iterator iter = instructions_.begin(); | 23 out->assign(program_.rbegin() + Offset(head), program_.rend()); |
rickyz (no longer on Chrome)
2014/11/12 23:43:50
Will head always end up being the most recently ad
mdempsky
2014/11/18 01:07:30
In practice it will be, but I feel a bit safer mak
| |
27 iter != instructions_.end(); | |
28 ++iter) { | |
29 delete *iter; | |
30 } | |
31 for (BasicBlocks::iterator iter = basic_blocks_.begin(); | |
32 iter != basic_blocks_.end(); | |
33 ++iter) { | |
34 delete *iter; | |
35 } | |
36 } | 24 } |
37 | 25 |
38 CodeGen::Addr CodeGen::MakeInstruction(uint16_t code, | 26 CodeGen::Addr CodeGen::MakeInstruction(uint16_t code, |
39 uint32_t k, | 27 uint32_t k, |
40 Addr jt, | 28 Addr jt, |
41 Addr jf) { | 29 Addr jf) { |
42 Instruction* insn; | |
43 if (BPF_CLASS(code) == BPF_JMP) { | 30 if (BPF_CLASS(code) == BPF_JMP) { |
44 CHECK_NE(kNullAddr, jt); | 31 CHECK_NE(BPF_JA, BPF_OP(code)); |
45 if (BPF_OP(code) == BPF_JA) { | 32 CheckValid(jt); |
46 CHECK_EQ(kNullAddr, jf); | 33 CheckValid(jf); |
47 } else { | |
48 CHECK_NE(kNullAddr, jf); | |
49 } | |
50 insn = new Instruction(code, k, jt, jf); | |
51 } else { | 34 } else { |
52 if (BPF_CLASS(code) == BPF_RET) { | 35 if (BPF_CLASS(code) == BPF_RET) { |
53 CHECK_EQ(kNullAddr, jt); | 36 CHECK_EQ(kNullAddr, jt); |
54 } else { | 37 } else { |
55 CHECK_NE(kNullAddr, jt); | 38 CheckValid(jt); |
56 } | 39 } |
57 CHECK_EQ(kNullAddr, jf); | 40 CHECK_EQ(kNullAddr, jf); |
58 insn = new Instruction(code, k, jt); | |
59 } | 41 } |
60 instructions_.push_back(insn); | 42 |
61 return insn; | 43 // Lookup the cache entry, inserting a new one if necessary. |
44 auto it = cache_.insert(std::make_pair(CacheKey(code, k, jt, jf), kNullAddr)); | |
45 CodeGen::Addr& res = it.first->second; | |
46 const bool miss = it.second; | |
47 | |
48 if (miss) { | |
49 if (BPF_CLASS(code) == BPF_JMP) { | |
50 // Ensure branch targets are within range. | |
51 if (Offset(jt) >= 255) { | |
rickyz (no longer on Chrome)
2014/11/12 23:43:50
Should this be 256?
mdempsky
2014/11/18 01:07:30
Sort of. It was 255 so I wouldn't need to worry a
| |
52 jt = Jump(jt); | |
53 } | |
54 if (Offset(jf) >= 256) { | |
55 jf = Jump(jf); | |
56 } | |
57 res = Append(code, k, Offset(jt), Offset(jf)); | |
58 } else { | |
59 if (BPF_CLASS(code) != BPF_RET && Offset(jt) != 0) { | |
60 jt = Jump(jt); | |
61 CHECK_EQ(0U, Offset(jt)); | |
62 } | |
63 res = Append(code, k, 0, 0); | |
64 } | |
65 } | |
66 return res; | |
62 } | 67 } |
63 | 68 |
64 void CodeGen::FindBranchTargets(const Instruction& instructions, | 69 size_t CodeGen::Offset(Addr target) const { |
65 BranchTargets* branch_targets) { | 70 CheckValid(target); |
66 // Follow all possible paths through the "instructions" graph and compute | 71 return (program_.size() - 1) - target; |
67 // a list of branch targets. This will later be needed to compute the | |
68 // boundaries of basic blocks. | |
69 // We maintain a set of all instructions that we have previously seen. This | |
70 // set ultimately converges on all instructions in the program. | |
71 std::set<const Instruction*> seen_instructions; | |
72 Instructions stack; | |
73 for (const Instruction* insn = &instructions; insn;) { | |
74 seen_instructions.insert(insn); | |
75 if (BPF_CLASS(insn->code) == BPF_JMP) { | |
76 // Found a jump. Increase count of incoming edges for each of the jump | |
77 // targets. | |
78 ++(*branch_targets)[insn->jt_ptr]; | |
79 if (BPF_OP(insn->code) != BPF_JA) { | |
80 ++(*branch_targets)[insn->jf_ptr]; | |
81 stack.push_back(const_cast<Instruction*>(insn)); | |
82 } | |
83 // Start a recursive decent for depth-first traversal. | |
84 if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) { | |
85 // We haven't seen the "true" branch yet. Traverse it now. We have | |
86 // already remembered the "false" branch on the stack and will | |
87 // traverse it later. | |
88 insn = insn->jt_ptr; | |
89 continue; | |
90 } else { | |
91 // Now try traversing the "false" branch. | |
92 insn = NULL; | |
93 } | |
94 } else { | |
95 // This is a non-jump instruction, just continue to the next instruction | |
96 // (if any). It's OK if "insn" becomes NULL when reaching a return | |
97 // instruction. | |
98 if (!insn->next != (BPF_CLASS(insn->code) == BPF_RET)) { | |
99 SANDBOX_DIE( | |
100 "Internal compiler error; return instruction must be at " | |
101 "the end of the BPF program"); | |
102 } | |
103 if (seen_instructions.find(insn->next) == seen_instructions.end()) { | |
104 insn = insn->next; | |
105 } else { | |
106 // We have seen this instruction before. That could happen if it is | |
107 // a branch target. No need to continue processing. | |
108 insn = NULL; | |
109 } | |
110 } | |
111 while (!insn && !stack.empty()) { | |
112 // We are done processing all the way to a leaf node, backtrack up the | |
113 // stack to any branches that we haven't processed yet. By definition, | |
114 // this has to be a "false" branch, as we always process the "true" | |
115 // branches right away. | |
116 insn = stack.back(); | |
117 stack.pop_back(); | |
118 if (seen_instructions.find(insn->jf_ptr) == seen_instructions.end()) { | |
119 // We haven't seen the "false" branch yet. So, that's where we'll | |
120 // go now. | |
121 insn = insn->jf_ptr; | |
122 } else { | |
123 // We have seen both the "true" and the "false" branch, continue | |
124 // up the stack. | |
125 if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) { | |
126 SANDBOX_DIE( | |
127 "Internal compiler error; cannot find all " | |
128 "branch targets"); | |
129 } | |
130 insn = NULL; | |
131 } | |
132 } | |
133 } | |
134 return; | |
135 } | 72 } |
136 | 73 |
137 BasicBlock* CodeGen::MakeBasicBlock(Instruction* head, Instruction* tail) { | 74 CodeGen::Addr CodeGen::Jump(Addr target) { |
138 // Iterate over all the instructions between "head" and "tail" and | 75 return Append(BPF_JMP | BPF_JA, Offset(target), 0, 0); |
139 // insert them into a new basic block. | |
140 BasicBlock* bb = new BasicBlock; | |
141 for (;; head = head->next) { | |
142 bb->instructions.push_back(head); | |
143 if (head == tail) { | |
144 break; | |
145 } | |
146 if (BPF_CLASS(head->code) == BPF_JMP) { | |
147 SANDBOX_DIE("Found a jump inside of a basic block"); | |
148 } | |
149 } | |
150 basic_blocks_.push_back(bb); | |
151 return bb; | |
152 } | 76 } |
153 | 77 |
154 void CodeGen::AddBasicBlock(Instruction* head, | 78 CodeGen::Addr CodeGen::Append(uint16_t code, |
155 Instruction* tail, | 79 uint32_t k, |
156 const BranchTargets& branch_targets, | 80 size_t jt, |
157 TargetsToBlocks* basic_blocks, | 81 size_t jf) { |
158 BasicBlock** firstBlock) { | 82 if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) { |
159 // Add a new basic block to "basic_blocks". Also set "firstBlock", if it | 83 CHECK_LT(jt, 256U); |
160 // has not been set before. | 84 CHECK_LT(jf, 256U); |
161 BranchTargets::const_iterator iter = branch_targets.find(head); | 85 } else { |
162 if ((iter == branch_targets.end()) != !*firstBlock || | 86 CHECK_EQ(0U, jt); |
163 !*firstBlock != basic_blocks->empty()) { | 87 CHECK_EQ(0U, jf); |
164 SANDBOX_DIE( | |
165 "Only the very first basic block should have no " | |
166 "incoming jumps"); | |
167 } | 88 } |
168 BasicBlock* bb = MakeBasicBlock(head, tail); | 89 |
169 if (!*firstBlock) { | 90 program_.push_back((sock_filter){code, jt, jf, k}); |
rickyz (no longer on Chrome)
2014/11/12 23:43:50
Might be good to CHECK_LE(program_.size(), BPF_MAX
mdempsky
2014/11/18 01:07:30
Done. (I added a CHECK_LT before the push_back.)
| |
170 *firstBlock = bb; | 91 return program_.size() - 1; |
171 } | |
172 (*basic_blocks)[head] = bb; | |
173 return; | |
174 } | 92 } |
175 | 93 |
176 BasicBlock* CodeGen::CutGraphIntoBasicBlocks( | 94 void CodeGen::CheckValid(Addr addr) const { |
177 Instruction* instructions, | 95 CHECK_LT(addr, program_.size()); |
178 const BranchTargets& branch_targets, | |
179 TargetsToBlocks* basic_blocks) { | |
180 // Textbook implementation of a basic block generator. All basic blocks | |
181 // start with a branch target and end with either a return statement or | |
182 // a jump (or are followed by an instruction that forms the beginning of a | |
183 // new block). Both conditional and "always" jumps are supported. | |
184 BasicBlock* first_block = NULL; | |
185 std::set<const Instruction*> seen_instructions; | |
186 Instructions stack; | |
187 Instruction* tail = NULL; | |
188 Instruction* head = instructions; | |
189 for (Instruction* insn = head; insn;) { | |
190 if (seen_instructions.find(insn) != seen_instructions.end()) { | |
191 // We somehow went in a circle. This should never be possible. Not even | |
192 // cyclic graphs are supposed to confuse us this much. | |
193 SANDBOX_DIE("Internal compiler error; cannot compute basic blocks"); | |
194 } | |
195 seen_instructions.insert(insn); | |
196 if (tail && branch_targets.find(insn) != branch_targets.end()) { | |
197 // We reached a branch target. Start a new basic block (this means, | |
198 // flushing the previous basic block first). | |
199 AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block); | |
200 head = insn; | |
201 } | |
202 if (BPF_CLASS(insn->code) == BPF_JMP) { | |
203 // We reached a jump instruction, this completes our current basic | |
204 // block. Flush it and continue by traversing both the true and the | |
205 // false branch of the jump. We need to maintain a stack to do so. | |
206 AddBasicBlock(head, insn, branch_targets, basic_blocks, &first_block); | |
207 if (BPF_OP(insn->code) != BPF_JA) { | |
208 stack.push_back(insn->jf_ptr); | |
209 } | |
210 insn = insn->jt_ptr; | |
211 | |
212 // If we are jumping to an instruction that we have previously | |
213 // processed, we are done with this branch. Continue by backtracking | |
214 // up the stack. | |
215 while (seen_instructions.find(insn) != seen_instructions.end()) { | |
216 backtracking: | |
217 if (stack.empty()) { | |
218 // We successfully traversed all reachable instructions. | |
219 return first_block; | |
220 } else { | |
221 // Going up the stack. | |
222 insn = stack.back(); | |
223 stack.pop_back(); | |
224 } | |
225 } | |
226 // Starting a new basic block. | |
227 tail = NULL; | |
228 head = insn; | |
229 } else { | |
230 // We found a non-jumping instruction, append it to current basic | |
231 // block. | |
232 tail = insn; | |
233 insn = insn->next; | |
234 if (!insn) { | |
235 // We reached a return statement, flush the current basic block and | |
236 // backtrack up the stack. | |
237 AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block); | |
238 goto backtracking; | |
239 } | |
240 } | |
241 } | |
242 return first_block; | |
243 } | 96 } |
244 | 97 |
245 // We define a comparator that inspects the sequence of instructions in our | 98 bool CodeGen::CacheKeyLess::operator()(const CacheKey& lhs, |
246 // basic block and any blocks referenced by this block. This function can be | 99 const CacheKey& rhs) const { |
247 // used in a "less" comparator for the purpose of storing pointers to basic | 100 if (lhs.a != rhs.a) |
248 // blocks in STL containers; this gives an easy option to use STL to find | 101 return lhs.a < rhs.a; |
249 // shared tail sequences of basic blocks. | 102 if (lhs.b != rhs.b) |
250 static int PointerCompare(const BasicBlock* block1, | 103 return lhs.b < rhs.b; |
251 const BasicBlock* block2, | 104 if (lhs.c != rhs.c) |
252 const TargetsToBlocks& blocks) { | 105 return lhs.c < rhs.c; |
253 // Return <0, 0, or >0 depending on the ordering of "block1" and "block2". | 106 if (lhs.d != rhs.d) |
254 // If we are looking at the exact same block, this is trivial and we don't | 107 return lhs.d < rhs.d; |
255 // need to do a full comparison. | 108 return false; |
256 if (block1 == block2) { | |
257 return 0; | |
258 } | |
259 | |
260 // We compare the sequence of instructions in both basic blocks. | |
261 const Instructions& insns1 = block1->instructions; | |
262 const Instructions& insns2 = block2->instructions; | |
263 // Basic blocks should never be empty. | |
264 CHECK(!insns1.empty()); | |
265 CHECK(!insns2.empty()); | |
266 | |
267 Instructions::const_iterator iter1 = insns1.begin(); | |
268 Instructions::const_iterator iter2 = insns2.begin(); | |
269 for (;; ++iter1, ++iter2) { | |
270 // If we have reached the end of the sequence of instructions in one or | |
271 // both basic blocks, we know the relative ordering between the two blocks | |
272 // and can return. | |
273 if (iter1 == insns1.end() || iter2 == insns2.end()) { | |
274 if (iter1 != insns1.end()) { | |
275 return 1; | |
276 } | |
277 if (iter2 != insns2.end()) { | |
278 return -1; | |
279 } | |
280 | |
281 // If the two blocks are the same length (and have elementwise-equal code | |
282 // and k fields) and their last instructions are neither a JMP nor a RET | |
283 // (which is the only way we can reach this point), then we must compare | |
284 // their successors. | |
285 Instruction* const insns1_last = insns1.back(); | |
286 Instruction* const insns2_last = insns2.back(); | |
287 CHECK(BPF_CLASS(insns1_last->code) != BPF_JMP && | |
288 BPF_CLASS(insns1_last->code) != BPF_RET); | |
289 | |
290 // Non jumping instructions will always have a valid next instruction. | |
291 CHECK(insns1_last->next); | |
292 CHECK(insns2_last->next); | |
293 return PointerCompare(blocks.find(insns1_last->next)->second, | |
294 blocks.find(insns2_last->next)->second, | |
295 blocks); | |
296 } | |
297 | |
298 // Compare the individual fields for both instructions. | |
299 const Instruction& insn1 = **iter1; | |
300 const Instruction& insn2 = **iter2; | |
301 if (insn1.code != insn2.code) { | |
302 return insn1.code - insn2.code; | |
303 } | |
304 if (insn1.k != insn2.k) { | |
305 return insn1.k - insn2.k; | |
306 } | |
307 | |
308 // Sanity check: If we're looking at a JMP or RET instruction, by definition | |
309 // it should be the last instruction of the basic block. | |
310 if (BPF_CLASS(insn1.code) == BPF_JMP || BPF_CLASS(insn1.code) == BPF_RET) { | |
311 CHECK_EQ(insns1.back(), &insn1); | |
312 CHECK_EQ(insns2.back(), &insn2); | |
313 } | |
314 | |
315 // RET instructions terminate execution, and only JMP instructions use the | |
316 // jt_ptr and jf_ptr fields. Anything else can continue to the next | |
317 // instruction in the basic block. | |
318 if (BPF_CLASS(insn1.code) == BPF_RET) { | |
319 return 0; | |
320 } else if (BPF_CLASS(insn1.code) != BPF_JMP) { | |
321 continue; | |
322 } | |
323 | |
324 // Recursively compare the "true" and "false" branches. | |
325 // A well-formed BPF program can't have any cycles, so we know | |
326 // that our recursive algorithm will ultimately terminate. | |
327 // In the unlikely event that the programmer made a mistake and | |
328 // went out of the way to give us a cyclic program, we will crash | |
329 // with a stack overflow. We are OK with that. | |
330 if (BPF_OP(insn1.code) != BPF_JA) { | |
331 int c = PointerCompare(blocks.find(insn1.jf_ptr)->second, | |
332 blocks.find(insn2.jf_ptr)->second, | |
333 blocks); | |
334 if (c != 0) { | |
335 return c; | |
336 } | |
337 } | |
338 return PointerCompare(blocks.find(insn1.jt_ptr)->second, | |
339 blocks.find(insn2.jt_ptr)->second, | |
340 blocks); | |
341 } | |
342 } | |
343 | |
344 void CodeGen::MergeTails(TargetsToBlocks* blocks) { | |
345 // We enter all of our basic blocks into a set using the BasicBlock::Less() | |
346 // comparator. This naturally results in blocks with identical tails of | |
347 // instructions to map to the same entry in the set. Whenever we discover | |
348 // that a particular chain of instructions is already in the set, we merge | |
349 // the basic blocks and update the pointer in the "blocks" map. | |
350 // Returns the number of unique basic blocks. | |
351 // N.B. We don't merge instructions on a granularity that is finer than | |
352 // a basic block. In practice, this is sufficiently rare that we don't | |
353 // incur a big cost. | |
354 // Similarly, we currently don't merge anything other than tails. In | |
355 // the future, we might decide to revisit this decision and attempt to | |
356 // merge arbitrary sub-sequences of instructions. | |
357 BasicBlock::Less<TargetsToBlocks> less(*blocks, PointerCompare); | |
358 typedef std::set<BasicBlock*, BasicBlock::Less<TargetsToBlocks> > Set; | |
359 Set seen_basic_blocks(less); | |
360 for (TargetsToBlocks::iterator iter = blocks->begin(); iter != blocks->end(); | |
361 ++iter) { | |
362 BasicBlock* bb = iter->second; | |
363 Set::const_iterator entry = seen_basic_blocks.find(bb); | |
364 if (entry == seen_basic_blocks.end()) { | |
365 // This is the first time we see this particular sequence of | |
366 // instructions. Enter the basic block into the set of known | |
367 // basic blocks. | |
368 seen_basic_blocks.insert(bb); | |
369 } else { | |
370 // We have previously seen another basic block that defines the same | |
371 // sequence of instructions. Merge the two blocks and update the | |
372 // pointer in the "blocks" map. | |
373 iter->second = *entry; | |
374 } | |
375 } | |
376 } | |
377 | |
378 void CodeGen::ComputeIncomingBranches(BasicBlock* block, | |
379 const TargetsToBlocks& targets_to_blocks, | |
380 IncomingBranches* incoming_branches) { | |
381 // We increment the number of incoming branches each time we encounter a | |
382 // basic block. But we only traverse recursively the very first time we | |
383 // encounter a new block. This is necessary to make topological sorting | |
384 // work correctly. | |
385 if (++(*incoming_branches)[block] == 1) { | |
386 Instruction* last_insn = block->instructions.back(); | |
387 if (BPF_CLASS(last_insn->code) == BPF_JMP) { | |
388 ComputeIncomingBranches(targets_to_blocks.find(last_insn->jt_ptr)->second, | |
389 targets_to_blocks, | |
390 incoming_branches); | |
391 if (BPF_OP(last_insn->code) != BPF_JA) { | |
392 ComputeIncomingBranches( | |
393 targets_to_blocks.find(last_insn->jf_ptr)->second, | |
394 targets_to_blocks, | |
395 incoming_branches); | |
396 } | |
397 } else if (BPF_CLASS(last_insn->code) != BPF_RET) { | |
398 ComputeIncomingBranches(targets_to_blocks.find(last_insn->next)->second, | |
399 targets_to_blocks, | |
400 incoming_branches); | |
401 } | |
402 } | |
403 } | |
404 | |
405 void CodeGen::TopoSortBasicBlocks(BasicBlock* first_block, | |
406 const TargetsToBlocks& blocks, | |
407 BasicBlocks* basic_blocks) { | |
408 // Textbook implementation of a toposort. We keep looking for basic blocks | |
409 // that don't have any incoming branches (initially, this is just the | |
410 // "first_block") and add them to the topologically sorted list of | |
411 // "basic_blocks". As we do so, we remove outgoing branches. This potentially | |
412 // ends up making our descendants eligible for the sorted list. The | |
413 // sorting algorithm terminates when there are no more basic blocks that have | |
414 // no incoming branches. If we didn't move all blocks from the set of | |
415 // "unordered_blocks" to the sorted list of "basic_blocks", there must have | |
416 // been a cyclic dependency. This should never happen in a BPF program, as | |
417 // well-formed BPF programs only ever have forward branches. | |
418 IncomingBranches unordered_blocks; | |
419 ComputeIncomingBranches(first_block, blocks, &unordered_blocks); | |
420 | |
421 std::set<BasicBlock*> heads; | |
422 for (;;) { | |
423 // Move block from "unordered_blocks" to "basic_blocks". | |
424 basic_blocks->push_back(first_block); | |
425 | |
426 // Inspect last instruction in the basic block. This is typically either a | |
427 // jump or a return statement. But it could also be a "normal" instruction | |
428 // that is followed by a jump target. | |
429 Instruction* last_insn = first_block->instructions.back(); | |
430 if (BPF_CLASS(last_insn->code) == BPF_JMP) { | |
431 // Remove outgoing branches. This might end up moving our descendants | |
432 // into set of "head" nodes that no longer have any incoming branches. | |
433 TargetsToBlocks::const_iterator iter; | |
434 if (BPF_OP(last_insn->code) != BPF_JA) { | |
435 iter = blocks.find(last_insn->jf_ptr); | |
436 if (!--unordered_blocks[iter->second]) { | |
437 heads.insert(iter->second); | |
438 } | |
439 } | |
440 iter = blocks.find(last_insn->jt_ptr); | |
441 if (!--unordered_blocks[iter->second]) { | |
442 first_block = iter->second; | |
443 continue; | |
444 } | |
445 } else if (BPF_CLASS(last_insn->code) != BPF_RET) { | |
446 // We encountered an instruction that doesn't change code flow. Try to | |
447 // pick the next "first_block" from "last_insn->next", if possible. | |
448 TargetsToBlocks::const_iterator iter; | |
449 iter = blocks.find(last_insn->next); | |
450 if (!--unordered_blocks[iter->second]) { | |
451 first_block = iter->second; | |
452 continue; | |
453 } else { | |
454 // Our basic block is supposed to be followed by "last_insn->next", | |
455 // but dependencies prevent this from happening. Insert a BPF_JA | |
456 // instruction to correct the code flow. | |
457 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, last_insn->next); | |
458 first_block->instructions.push_back(ja); | |
459 last_insn->next = ja; | |
460 } | |
461 } | |
462 if (heads.empty()) { | |
463 if (unordered_blocks.size() != basic_blocks->size()) { | |
464 SANDBOX_DIE("Internal compiler error; cyclic graph detected"); | |
465 } | |
466 return; | |
467 } | |
468 // Proceed by picking an arbitrary node from the set of basic blocks that | |
469 // do not have any incoming branches. | |
470 first_block = *heads.begin(); | |
471 heads.erase(heads.begin()); | |
472 } | |
473 } | |
474 | |
475 void CodeGen::ComputeRelativeJumps(BasicBlocks* basic_blocks, | |
476 const TargetsToBlocks& targets_to_blocks) { | |
477 // While we previously used pointers in jt_ptr and jf_ptr to link jump | |
478 // instructions to their targets, we now convert these jumps to relative | |
479 // jumps that are suitable for loading the BPF program into the kernel. | |
480 int offset = 0; | |
481 | |
482 // Since we just completed a toposort, all jump targets are guaranteed to | |
483 // go forward. This means, iterating over the basic blocks in reverse makes | |
484 // it trivial to compute the correct offsets. | |
485 BasicBlock* bb = NULL; | |
486 BasicBlock* last_bb = NULL; | |
487 for (BasicBlocks::reverse_iterator iter = basic_blocks->rbegin(); | |
488 iter != basic_blocks->rend(); | |
489 ++iter) { | |
490 last_bb = bb; | |
491 bb = *iter; | |
492 Instruction* insn = bb->instructions.back(); | |
493 if (BPF_CLASS(insn->code) == BPF_JMP) { | |
494 // Basic block ended in a jump instruction. We can now compute the | |
495 // appropriate offsets. | |
496 if (BPF_OP(insn->code) == BPF_JA) { | |
497 // "Always" jumps use the 32bit "k" field for the offset, instead | |
498 // of the 8bit "jt" and "jf" fields. | |
499 int jmp = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset; | |
500 insn->k = jmp; | |
501 insn->jt = insn->jf = 0; | |
502 } else { | |
503 // The offset computations for conditional jumps are just the same | |
504 // as for "always" jumps. | |
505 int jt = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset; | |
506 int jf = offset - targets_to_blocks.find(insn->jf_ptr)->second->offset; | |
507 | |
508 // There is an added complication, because conditional relative jumps | |
509 // can only jump at most 255 instructions forward. If we have to jump | |
510 // further, insert an extra "always" jump. | |
511 Instructions::size_type jmp = bb->instructions.size(); | |
512 if (jt > 255 || (jt == 255 && jf > 255)) { | |
513 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jt_ptr); | |
514 bb->instructions.push_back(ja); | |
515 ja->k = jt; | |
516 ja->jt = ja->jf = 0; | |
517 | |
518 // The newly inserted "always" jump, of course, requires us to adjust | |
519 // the jump targets in the original conditional jump. | |
520 jt = 0; | |
521 ++jf; | |
522 } | |
523 if (jf > 255) { | |
524 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jf_ptr); | |
525 bb->instructions.insert(bb->instructions.begin() + jmp, ja); | |
526 ja->k = jf; | |
527 ja->jt = ja->jf = 0; | |
528 | |
529 // Again, we have to adjust the jump targets in the original | |
530 // conditional jump. | |
531 ++jt; | |
532 jf = 0; | |
533 } | |
534 | |
535 // Now we can finally set the relative jump targets in the conditional | |
536 // jump instruction. Afterwards, we must no longer access the jt_ptr | |
537 // and jf_ptr fields. | |
538 insn->jt = jt; | |
539 insn->jf = jf; | |
540 } | |
541 } else if (BPF_CLASS(insn->code) != BPF_RET && | |
542 targets_to_blocks.find(insn->next)->second != last_bb) { | |
543 SANDBOX_DIE("Internal compiler error; invalid basic block encountered"); | |
544 } | |
545 | |
546 // Proceed to next basic block. | |
547 offset += bb->instructions.size(); | |
548 bb->offset = offset; | |
549 } | |
550 return; | |
551 } | |
552 | |
553 void CodeGen::ConcatenateBasicBlocks(const BasicBlocks& basic_blocks, | |
554 Program* program) { | |
555 // Our basic blocks have been sorted and relative jump offsets have been | |
556 // computed. The last remaining step is for all the instructions in our | |
557 // basic blocks to be concatenated into a BPF program. | |
558 program->clear(); | |
559 for (BasicBlocks::const_iterator bb_iter = basic_blocks.begin(); | |
560 bb_iter != basic_blocks.end(); | |
561 ++bb_iter) { | |
562 const BasicBlock& bb = **bb_iter; | |
563 for (Instructions::const_iterator insn_iter = bb.instructions.begin(); | |
564 insn_iter != bb.instructions.end(); | |
565 ++insn_iter) { | |
566 const Instruction& insn = **insn_iter; | |
567 program->push_back( | |
568 (struct sock_filter) {insn.code, insn.jt, insn.jf, insn.k}); | |
569 } | |
570 } | |
571 return; | |
572 } | |
573 | |
574 void CodeGen::Compile(Instruction* instructions, Program* program) { | |
575 if (compiled_) { | |
576 SANDBOX_DIE( | |
577 "Cannot call Compile() multiple times. Create a new code " | |
578 "generator instead"); | |
579 } | |
580 compiled_ = true; | |
581 | |
582 BranchTargets branch_targets; | |
583 FindBranchTargets(*instructions, &branch_targets); | |
584 TargetsToBlocks all_blocks; | |
585 BasicBlock* first_block = | |
586 CutGraphIntoBasicBlocks(instructions, branch_targets, &all_blocks); | |
587 MergeTails(&all_blocks); | |
588 BasicBlocks basic_blocks; | |
589 TopoSortBasicBlocks(first_block, all_blocks, &basic_blocks); | |
590 ComputeRelativeJumps(&basic_blocks, all_blocks); | |
591 ConcatenateBasicBlocks(basic_blocks, program); | |
592 return; | |
593 } | 109 } |
594 | 110 |
595 } // namespace sandbox | 111 } // namespace sandbox |
OLD | NEW |