OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "sandbox/linux/seccomp-bpf/codegen.h" | 5 #include "sandbox/linux/seccomp-bpf/codegen.h" |
6 | 6 |
7 #include <linux/filter.h> | 7 #include <linux/filter.h> |
8 | 8 |
9 #include <set> | 9 #include <limits> |
| 10 #include <utility> |
10 | 11 |
11 #include "base/logging.h" | 12 #include "base/logging.h" |
12 #include "sandbox/linux/seccomp-bpf/basicblock.h" | 13 |
13 #include "sandbox/linux/seccomp-bpf/die.h" | 14 // This CodeGen implementation strives for simplicity while still |
14 #include "sandbox/linux/seccomp-bpf/instruction.h" | 15 // generating acceptable BPF programs under typical usage patterns |
| 16 // (e.g., by PolicyCompiler). |
| 17 // |
| 18 // The key to its simplicity is that BPF programs only support forward |
| 19 // jumps/branches, which allows constraining the DAG construction API |
| 20 // to make instruction nodes immutable. Immutable nodes admits a |
| 21 // simple greedy approach of emitting new instructions as needed and |
| 22 // then reusing existing ones that have already been emitted. This |
| 23 // cleanly avoids any need to compute basic blocks or apply |
| 24 // topological sorting because the API effectively sorts instructions |
| 25 // for us (e.g., before MakeInstruction() can be called to emit a |
| 26 // branch instruction, it must have already been called for each |
| 27 // branch path). |
| 28 // |
| 29 // This greedy algorithm is not without (theoretical) weakness though: |
| 30 // |
| 31 // 1. In the general case, we don't eliminate dead code. If needed, |
| 32 // we could trace back through the program in Compile() and elide |
| 33 // any unneeded instructions, but in practice we only emit live |
| 34 // instructions anyway. |
| 35 // |
| 36 // 2. By not dividing instructions into basic blocks and sorting, we |
| 37 // lose an opportunity to move non-branch/non-return instructions |
| 38 // adjacent to their successor instructions, which means we might |
| 39 // need to emit additional jumps. But in practice, they'll |
| 40 // already be nearby as long as callers don't go out of their way |
| 41 // to interleave MakeInstruction() calls for unrelated code |
| 42 // sequences. |
15 | 43 |
16 namespace sandbox { | 44 namespace sandbox { |
17 | 45 |
18 // Unfortunately this needs to be defined out-of-line because inline | 46 // kBranchRange is the maximum value that can be stored in |
19 // initializing a static member to "nullptr" requires "constexpr", | 47 // sock_filter's 8-bit jt and jf fields. |
20 // which is currently banned by the Chromium style guide. | 48 const size_t kBranchRange = std::numeric_limits<uint8_t>::max(); |
21 const CodeGen::Node CodeGen::kNullNode = nullptr; | |
22 | 49 |
23 CodeGen::CodeGen() : compiled_(false) {} | 50 const CodeGen::Node CodeGen::kNullNode; |
| 51 |
| 52 CodeGen::CodeGen() : program_(), memos_() { |
| 53 } |
24 | 54 |
25 CodeGen::~CodeGen() { | 55 CodeGen::~CodeGen() { |
26 for (Instructions::iterator iter = instructions_.begin(); | 56 } |
27 iter != instructions_.end(); | 57 |
28 ++iter) { | 58 void CodeGen::Compile(CodeGen::Node head, Program* out) { |
29 delete *iter; | 59 DCHECK(out); |
30 } | 60 out->assign(program_.rbegin() + Offset(head), program_.rend()); |
31 for (BasicBlocks::iterator iter = basic_blocks_.begin(); | |
32 iter != basic_blocks_.end(); | |
33 ++iter) { | |
34 delete *iter; | |
35 } | |
36 } | 61 } |
37 | 62 |
38 CodeGen::Node CodeGen::MakeInstruction(uint16_t code, | 63 CodeGen::Node CodeGen::MakeInstruction(uint16_t code, |
39 uint32_t k, | 64 uint32_t k, |
40 Node jt, | 65 Node jt, |
41 Node jf) { | 66 Node jf) { |
42 Node insn; | 67 // To avoid generating redundant code sequences, we memoize the |
43 if (BPF_CLASS(code) == BPF_JMP) { | 68 // results from AppendInstruction(). |
44 CHECK_NE(kNullNode, jt); | 69 auto res = memos_.insert(std::make_pair(MemoKey(code, k, jt, jf), kNullNode)); |
45 if (BPF_OP(code) == BPF_JA) { | 70 CodeGen::Node* node = &res.first->second; |
46 CHECK_EQ(kNullNode, jf); | 71 if (res.second) { // Newly inserted memo entry. |
47 } else { | 72 *node = AppendInstruction(code, k, jt, jf); |
48 CHECK_NE(kNullNode, jf); | |
49 } | |
50 insn = new Instruction(code, k, jt, jf); | |
51 } else { | |
52 if (BPF_CLASS(code) == BPF_RET) { | |
53 CHECK_EQ(kNullNode, jt); | |
54 } else { | |
55 CHECK_NE(kNullNode, jt); | |
56 } | |
57 CHECK_EQ(kNullNode, jf); | |
58 insn = new Instruction(code, k, jt); | |
59 } | 73 } |
60 instructions_.push_back(insn); | 74 return *node; |
61 return insn; | |
62 } | 75 } |
63 | 76 |
64 void CodeGen::FindBranchTargets(const Instruction& instructions, | 77 CodeGen::Node CodeGen::AppendInstruction(uint16_t code, |
65 BranchTargets* branch_targets) { | 78 uint32_t k, |
66 // Follow all possible paths through the "instructions" graph and compute | 79 Node jt, |
67 // a list of branch targets. This will later be needed to compute the | 80 Node jf) { |
68 // boundaries of basic blocks. | 81 if (BPF_CLASS(code) == BPF_JMP) { |
69 // We maintain a set of all instructions that we have previously seen. This | 82 CHECK_NE(BPF_JA, BPF_OP(code)) << "CodeGen inserts JAs as needed"; |
70 // set ultimately converges on all instructions in the program. | 83 |
71 std::set<const Instruction*> seen_instructions; | 84 // We need to check |jt| twice because it might get pushed |
72 Instructions stack; | 85 // out-of-range by appending a jump for |jf|. |
73 for (const Instruction* insn = &instructions; insn;) { | 86 jt = WithinRange(jt, kBranchRange); |
74 seen_instructions.insert(insn); | 87 jf = WithinRange(jf, kBranchRange); |
75 if (BPF_CLASS(insn->code) == BPF_JMP) { | 88 jt = WithinRange(jt, kBranchRange); |
76 // Found a jump. Increase count of incoming edges for each of the jump | 89 return Append(code, k, Offset(jt), Offset(jf)); |
77 // targets. | |
78 ++(*branch_targets)[insn->jt_ptr]; | |
79 if (BPF_OP(insn->code) != BPF_JA) { | |
80 ++(*branch_targets)[insn->jf_ptr]; | |
81 stack.push_back(const_cast<Instruction*>(insn)); | |
82 } | |
83 // Start a recursive decent for depth-first traversal. | |
84 if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) { | |
85 // We haven't seen the "true" branch yet. Traverse it now. We have | |
86 // already remembered the "false" branch on the stack and will | |
87 // traverse it later. | |
88 insn = insn->jt_ptr; | |
89 continue; | |
90 } else { | |
91 // Now try traversing the "false" branch. | |
92 insn = NULL; | |
93 } | |
94 } else { | |
95 // This is a non-jump instruction, just continue to the next instruction | |
96 // (if any). It's OK if "insn" becomes NULL when reaching a return | |
97 // instruction. | |
98 if (!insn->next != (BPF_CLASS(insn->code) == BPF_RET)) { | |
99 SANDBOX_DIE( | |
100 "Internal compiler error; return instruction must be at " | |
101 "the end of the BPF program"); | |
102 } | |
103 if (seen_instructions.find(insn->next) == seen_instructions.end()) { | |
104 insn = insn->next; | |
105 } else { | |
106 // We have seen this instruction before. That could happen if it is | |
107 // a branch target. No need to continue processing. | |
108 insn = NULL; | |
109 } | |
110 } | |
111 while (!insn && !stack.empty()) { | |
112 // We are done processing all the way to a leaf node, backtrack up the | |
113 // stack to any branches that we haven't processed yet. By definition, | |
114 // this has to be a "false" branch, as we always process the "true" | |
115 // branches right away. | |
116 insn = stack.back(); | |
117 stack.pop_back(); | |
118 if (seen_instructions.find(insn->jf_ptr) == seen_instructions.end()) { | |
119 // We haven't seen the "false" branch yet. So, that's where we'll | |
120 // go now. | |
121 insn = insn->jf_ptr; | |
122 } else { | |
123 // We have seen both the "true" and the "false" branch, continue | |
124 // up the stack. | |
125 if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) { | |
126 SANDBOX_DIE( | |
127 "Internal compiler error; cannot find all " | |
128 "branch targets"); | |
129 } | |
130 insn = NULL; | |
131 } | |
132 } | |
133 } | 90 } |
134 return; | 91 |
| 92 CHECK_EQ(kNullNode, jf) << "Non-branch instructions shouldn't provide jf"; |
| 93 if (BPF_CLASS(code) == BPF_RET) { |
| 94 CHECK_EQ(kNullNode, jt) << "Return instructions shouldn't provide jt"; |
| 95 } else { |
| 96 // For non-branch/non-return instructions, execution always |
| 97 // proceeds to the next instruction; so we need to arrange for |
| 98 // that to be |jt|. |
| 99 jt = WithinRange(jt, 0); |
| 100 } |
| 101 return Append(code, k, 0, 0); |
135 } | 102 } |
136 | 103 |
137 BasicBlock* CodeGen::MakeBasicBlock(Instruction* head, Instruction* tail) { | 104 CodeGen::Node CodeGen::WithinRange(Node target, size_t range) { |
138 // Iterate over all the instructions between "head" and "tail" and | 105 if (Offset(target) > range) { |
139 // insert them into a new basic block. | 106 // TODO(mdempsky): If |range > 0|, we might be able to reuse an |
140 BasicBlock* bb = new BasicBlock; | 107 // existing instruction within that range. |
141 for (;; head = head->next) { | 108 |
142 bb->instructions.push_back(head); | 109 // TODO(mdempsky): If |target| is a branch or return, it might be |
143 if (head == tail) { | 110 // possible to duplicate that instruction rather than jump to it. |
144 break; | 111 |
145 } | 112 // Fall back to emitting a jump instruction. |
146 if (BPF_CLASS(head->code) == BPF_JMP) { | 113 target = Append(BPF_JMP | BPF_JA, Offset(target), 0, 0); |
147 SANDBOX_DIE("Found a jump inside of a basic block"); | |
148 } | |
149 } | 114 } |
150 basic_blocks_.push_back(bb); | 115 |
151 return bb; | 116 CHECK_LE(Offset(target), range) << "ICE: Failed to bring target within range"; |
| 117 return target; |
152 } | 118 } |
153 | 119 |
154 void CodeGen::AddBasicBlock(Instruction* head, | 120 CodeGen::Node CodeGen::Append(uint16_t code, uint32_t k, size_t jt, size_t jf) { |
155 Instruction* tail, | 121 if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) { |
156 const BranchTargets& branch_targets, | 122 CHECK_LE(jt, kBranchRange); |
157 TargetsToBlocks* basic_blocks, | 123 CHECK_LE(jf, kBranchRange); |
158 BasicBlock** firstBlock) { | 124 } else { |
159 // Add a new basic block to "basic_blocks". Also set "firstBlock", if it | 125 CHECK_EQ(0U, jt); |
160 // has not been set before. | 126 CHECK_EQ(0U, jf); |
161 BranchTargets::const_iterator iter = branch_targets.find(head); | |
162 if ((iter == branch_targets.end()) != !*firstBlock || | |
163 !*firstBlock != basic_blocks->empty()) { | |
164 SANDBOX_DIE( | |
165 "Only the very first basic block should have no " | |
166 "incoming jumps"); | |
167 } | 127 } |
168 BasicBlock* bb = MakeBasicBlock(head, tail); | 128 |
169 if (!*firstBlock) { | 129 CHECK_LT(program_.size(), static_cast<size_t>(BPF_MAXINSNS)); |
170 *firstBlock = bb; | 130 program_.push_back(sock_filter{code, jt, jf, k}); |
171 } | 131 return program_.size() - 1; |
172 (*basic_blocks)[head] = bb; | |
173 return; | |
174 } | 132 } |
175 | 133 |
176 BasicBlock* CodeGen::CutGraphIntoBasicBlocks( | 134 size_t CodeGen::Offset(Node target) const { |
177 Instruction* instructions, | 135 CHECK_LT(target, program_.size()) << "Bogus offset target node"; |
178 const BranchTargets& branch_targets, | 136 return (program_.size() - 1) - target; |
179 TargetsToBlocks* basic_blocks) { | |
180 // Textbook implementation of a basic block generator. All basic blocks | |
181 // start with a branch target and end with either a return statement or | |
182 // a jump (or are followed by an instruction that forms the beginning of a | |
183 // new block). Both conditional and "always" jumps are supported. | |
184 BasicBlock* first_block = NULL; | |
185 std::set<const Instruction*> seen_instructions; | |
186 Instructions stack; | |
187 Instruction* tail = NULL; | |
188 Instruction* head = instructions; | |
189 for (Instruction* insn = head; insn;) { | |
190 if (seen_instructions.find(insn) != seen_instructions.end()) { | |
191 // We somehow went in a circle. This should never be possible. Not even | |
192 // cyclic graphs are supposed to confuse us this much. | |
193 SANDBOX_DIE("Internal compiler error; cannot compute basic blocks"); | |
194 } | |
195 seen_instructions.insert(insn); | |
196 if (tail && branch_targets.find(insn) != branch_targets.end()) { | |
197 // We reached a branch target. Start a new basic block (this means, | |
198 // flushing the previous basic block first). | |
199 AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block); | |
200 head = insn; | |
201 } | |
202 if (BPF_CLASS(insn->code) == BPF_JMP) { | |
203 // We reached a jump instruction, this completes our current basic | |
204 // block. Flush it and continue by traversing both the true and the | |
205 // false branch of the jump. We need to maintain a stack to do so. | |
206 AddBasicBlock(head, insn, branch_targets, basic_blocks, &first_block); | |
207 if (BPF_OP(insn->code) != BPF_JA) { | |
208 stack.push_back(insn->jf_ptr); | |
209 } | |
210 insn = insn->jt_ptr; | |
211 | |
212 // If we are jumping to an instruction that we have previously | |
213 // processed, we are done with this branch. Continue by backtracking | |
214 // up the stack. | |
215 while (seen_instructions.find(insn) != seen_instructions.end()) { | |
216 backtracking: | |
217 if (stack.empty()) { | |
218 // We successfully traversed all reachable instructions. | |
219 return first_block; | |
220 } else { | |
221 // Going up the stack. | |
222 insn = stack.back(); | |
223 stack.pop_back(); | |
224 } | |
225 } | |
226 // Starting a new basic block. | |
227 tail = NULL; | |
228 head = insn; | |
229 } else { | |
230 // We found a non-jumping instruction, append it to current basic | |
231 // block. | |
232 tail = insn; | |
233 insn = insn->next; | |
234 if (!insn) { | |
235 // We reached a return statement, flush the current basic block and | |
236 // backtrack up the stack. | |
237 AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block); | |
238 goto backtracking; | |
239 } | |
240 } | |
241 } | |
242 return first_block; | |
243 } | 137 } |
244 | 138 |
245 // We define a comparator that inspects the sequence of instructions in our | 139 // TODO(mdempsky): Move into a general base::Tuple helper library. |
246 // basic block and any blocks referenced by this block. This function can be | 140 bool CodeGen::MemoKeyLess::operator()(const MemoKey& lhs, |
247 // used in a "less" comparator for the purpose of storing pointers to basic | 141 const MemoKey& rhs) const { |
248 // blocks in STL containers; this gives an easy option to use STL to find | 142 if (lhs.a != rhs.a) |
249 // shared tail sequences of basic blocks. | 143 return lhs.a < rhs.a; |
250 static int PointerCompare(const BasicBlock* block1, | 144 if (lhs.b != rhs.b) |
251 const BasicBlock* block2, | 145 return lhs.b < rhs.b; |
252 const TargetsToBlocks& blocks) { | 146 if (lhs.c != rhs.c) |
253 // Return <0, 0, or >0 depending on the ordering of "block1" and "block2". | 147 return lhs.c < rhs.c; |
254 // If we are looking at the exact same block, this is trivial and we don't | 148 if (lhs.d != rhs.d) |
255 // need to do a full comparison. | 149 return lhs.d < rhs.d; |
256 if (block1 == block2) { | 150 return false; |
257 return 0; | |
258 } | |
259 | |
260 // We compare the sequence of instructions in both basic blocks. | |
261 const Instructions& insns1 = block1->instructions; | |
262 const Instructions& insns2 = block2->instructions; | |
263 // Basic blocks should never be empty. | |
264 CHECK(!insns1.empty()); | |
265 CHECK(!insns2.empty()); | |
266 | |
267 Instructions::const_iterator iter1 = insns1.begin(); | |
268 Instructions::const_iterator iter2 = insns2.begin(); | |
269 for (;; ++iter1, ++iter2) { | |
270 // If we have reached the end of the sequence of instructions in one or | |
271 // both basic blocks, we know the relative ordering between the two blocks | |
272 // and can return. | |
273 if (iter1 == insns1.end() || iter2 == insns2.end()) { | |
274 if (iter1 != insns1.end()) { | |
275 return 1; | |
276 } | |
277 if (iter2 != insns2.end()) { | |
278 return -1; | |
279 } | |
280 | |
281 // If the two blocks are the same length (and have elementwise-equal code | |
282 // and k fields) and their last instructions are neither a JMP nor a RET | |
283 // (which is the only way we can reach this point), then we must compare | |
284 // their successors. | |
285 Instruction* const insns1_last = insns1.back(); | |
286 Instruction* const insns2_last = insns2.back(); | |
287 CHECK(BPF_CLASS(insns1_last->code) != BPF_JMP && | |
288 BPF_CLASS(insns1_last->code) != BPF_RET); | |
289 | |
290 // Non jumping instructions will always have a valid next instruction. | |
291 CHECK(insns1_last->next); | |
292 CHECK(insns2_last->next); | |
293 return PointerCompare(blocks.find(insns1_last->next)->second, | |
294 blocks.find(insns2_last->next)->second, | |
295 blocks); | |
296 } | |
297 | |
298 // Compare the individual fields for both instructions. | |
299 const Instruction& insn1 = **iter1; | |
300 const Instruction& insn2 = **iter2; | |
301 if (insn1.code != insn2.code) { | |
302 return insn1.code - insn2.code; | |
303 } | |
304 if (insn1.k != insn2.k) { | |
305 return insn1.k - insn2.k; | |
306 } | |
307 | |
308 // Sanity check: If we're looking at a JMP or RET instruction, by definition | |
309 // it should be the last instruction of the basic block. | |
310 if (BPF_CLASS(insn1.code) == BPF_JMP || BPF_CLASS(insn1.code) == BPF_RET) { | |
311 CHECK_EQ(insns1.back(), &insn1); | |
312 CHECK_EQ(insns2.back(), &insn2); | |
313 } | |
314 | |
315 // RET instructions terminate execution, and only JMP instructions use the | |
316 // jt_ptr and jf_ptr fields. Anything else can continue to the next | |
317 // instruction in the basic block. | |
318 if (BPF_CLASS(insn1.code) == BPF_RET) { | |
319 return 0; | |
320 } else if (BPF_CLASS(insn1.code) != BPF_JMP) { | |
321 continue; | |
322 } | |
323 | |
324 // Recursively compare the "true" and "false" branches. | |
325 // A well-formed BPF program can't have any cycles, so we know | |
326 // that our recursive algorithm will ultimately terminate. | |
327 // In the unlikely event that the programmer made a mistake and | |
328 // went out of the way to give us a cyclic program, we will crash | |
329 // with a stack overflow. We are OK with that. | |
330 if (BPF_OP(insn1.code) != BPF_JA) { | |
331 int c = PointerCompare(blocks.find(insn1.jf_ptr)->second, | |
332 blocks.find(insn2.jf_ptr)->second, | |
333 blocks); | |
334 if (c != 0) { | |
335 return c; | |
336 } | |
337 } | |
338 return PointerCompare(blocks.find(insn1.jt_ptr)->second, | |
339 blocks.find(insn2.jt_ptr)->second, | |
340 blocks); | |
341 } | |
342 } | |
343 | |
344 void CodeGen::MergeTails(TargetsToBlocks* blocks) { | |
345 // We enter all of our basic blocks into a set using the BasicBlock::Less() | |
346 // comparator. This naturally results in blocks with identical tails of | |
347 // instructions to map to the same entry in the set. Whenever we discover | |
348 // that a particular chain of instructions is already in the set, we merge | |
349 // the basic blocks and update the pointer in the "blocks" map. | |
350 // Returns the number of unique basic blocks. | |
351 // N.B. We don't merge instructions on a granularity that is finer than | |
352 // a basic block. In practice, this is sufficiently rare that we don't | |
353 // incur a big cost. | |
354 // Similarly, we currently don't merge anything other than tails. In | |
355 // the future, we might decide to revisit this decision and attempt to | |
356 // merge arbitrary sub-sequences of instructions. | |
357 BasicBlock::Less<TargetsToBlocks> less(*blocks, PointerCompare); | |
358 typedef std::set<BasicBlock*, BasicBlock::Less<TargetsToBlocks> > Set; | |
359 Set seen_basic_blocks(less); | |
360 for (TargetsToBlocks::iterator iter = blocks->begin(); iter != blocks->end(); | |
361 ++iter) { | |
362 BasicBlock* bb = iter->second; | |
363 Set::const_iterator entry = seen_basic_blocks.find(bb); | |
364 if (entry == seen_basic_blocks.end()) { | |
365 // This is the first time we see this particular sequence of | |
366 // instructions. Enter the basic block into the set of known | |
367 // basic blocks. | |
368 seen_basic_blocks.insert(bb); | |
369 } else { | |
370 // We have previously seen another basic block that defines the same | |
371 // sequence of instructions. Merge the two blocks and update the | |
372 // pointer in the "blocks" map. | |
373 iter->second = *entry; | |
374 } | |
375 } | |
376 } | |
377 | |
378 void CodeGen::ComputeIncomingBranches(BasicBlock* block, | |
379 const TargetsToBlocks& targets_to_blocks, | |
380 IncomingBranches* incoming_branches) { | |
381 // We increment the number of incoming branches each time we encounter a | |
382 // basic block. But we only traverse recursively the very first time we | |
383 // encounter a new block. This is necessary to make topological sorting | |
384 // work correctly. | |
385 if (++(*incoming_branches)[block] == 1) { | |
386 Instruction* last_insn = block->instructions.back(); | |
387 if (BPF_CLASS(last_insn->code) == BPF_JMP) { | |
388 ComputeIncomingBranches(targets_to_blocks.find(last_insn->jt_ptr)->second, | |
389 targets_to_blocks, | |
390 incoming_branches); | |
391 if (BPF_OP(last_insn->code) != BPF_JA) { | |
392 ComputeIncomingBranches( | |
393 targets_to_blocks.find(last_insn->jf_ptr)->second, | |
394 targets_to_blocks, | |
395 incoming_branches); | |
396 } | |
397 } else if (BPF_CLASS(last_insn->code) != BPF_RET) { | |
398 ComputeIncomingBranches(targets_to_blocks.find(last_insn->next)->second, | |
399 targets_to_blocks, | |
400 incoming_branches); | |
401 } | |
402 } | |
403 } | |
404 | |
405 void CodeGen::TopoSortBasicBlocks(BasicBlock* first_block, | |
406 const TargetsToBlocks& blocks, | |
407 BasicBlocks* basic_blocks) { | |
408 // Textbook implementation of a toposort. We keep looking for basic blocks | |
409 // that don't have any incoming branches (initially, this is just the | |
410 // "first_block") and add them to the topologically sorted list of | |
411 // "basic_blocks". As we do so, we remove outgoing branches. This potentially | |
412 // ends up making our descendants eligible for the sorted list. The | |
413 // sorting algorithm terminates when there are no more basic blocks that have | |
414 // no incoming branches. If we didn't move all blocks from the set of | |
415 // "unordered_blocks" to the sorted list of "basic_blocks", there must have | |
416 // been a cyclic dependency. This should never happen in a BPF program, as | |
417 // well-formed BPF programs only ever have forward branches. | |
418 IncomingBranches unordered_blocks; | |
419 ComputeIncomingBranches(first_block, blocks, &unordered_blocks); | |
420 | |
421 std::set<BasicBlock*> heads; | |
422 for (;;) { | |
423 // Move block from "unordered_blocks" to "basic_blocks". | |
424 basic_blocks->push_back(first_block); | |
425 | |
426 // Inspect last instruction in the basic block. This is typically either a | |
427 // jump or a return statement. But it could also be a "normal" instruction | |
428 // that is followed by a jump target. | |
429 Instruction* last_insn = first_block->instructions.back(); | |
430 if (BPF_CLASS(last_insn->code) == BPF_JMP) { | |
431 // Remove outgoing branches. This might end up moving our descendants | |
432 // into set of "head" nodes that no longer have any incoming branches. | |
433 TargetsToBlocks::const_iterator iter; | |
434 if (BPF_OP(last_insn->code) != BPF_JA) { | |
435 iter = blocks.find(last_insn->jf_ptr); | |
436 if (!--unordered_blocks[iter->second]) { | |
437 heads.insert(iter->second); | |
438 } | |
439 } | |
440 iter = blocks.find(last_insn->jt_ptr); | |
441 if (!--unordered_blocks[iter->second]) { | |
442 first_block = iter->second; | |
443 continue; | |
444 } | |
445 } else if (BPF_CLASS(last_insn->code) != BPF_RET) { | |
446 // We encountered an instruction that doesn't change code flow. Try to | |
447 // pick the next "first_block" from "last_insn->next", if possible. | |
448 TargetsToBlocks::const_iterator iter; | |
449 iter = blocks.find(last_insn->next); | |
450 if (!--unordered_blocks[iter->second]) { | |
451 first_block = iter->second; | |
452 continue; | |
453 } else { | |
454 // Our basic block is supposed to be followed by "last_insn->next", | |
455 // but dependencies prevent this from happening. Insert a BPF_JA | |
456 // instruction to correct the code flow. | |
457 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, last_insn->next); | |
458 first_block->instructions.push_back(ja); | |
459 last_insn->next = ja; | |
460 } | |
461 } | |
462 if (heads.empty()) { | |
463 if (unordered_blocks.size() != basic_blocks->size()) { | |
464 SANDBOX_DIE("Internal compiler error; cyclic graph detected"); | |
465 } | |
466 return; | |
467 } | |
468 // Proceed by picking an arbitrary node from the set of basic blocks that | |
469 // do not have any incoming branches. | |
470 first_block = *heads.begin(); | |
471 heads.erase(heads.begin()); | |
472 } | |
473 } | |
474 | |
475 void CodeGen::ComputeRelativeJumps(BasicBlocks* basic_blocks, | |
476 const TargetsToBlocks& targets_to_blocks) { | |
477 // While we previously used pointers in jt_ptr and jf_ptr to link jump | |
478 // instructions to their targets, we now convert these jumps to relative | |
479 // jumps that are suitable for loading the BPF program into the kernel. | |
480 int offset = 0; | |
481 | |
482 // Since we just completed a toposort, all jump targets are guaranteed to | |
483 // go forward. This means, iterating over the basic blocks in reverse makes | |
484 // it trivial to compute the correct offsets. | |
485 BasicBlock* bb = NULL; | |
486 BasicBlock* last_bb = NULL; | |
487 for (BasicBlocks::reverse_iterator iter = basic_blocks->rbegin(); | |
488 iter != basic_blocks->rend(); | |
489 ++iter) { | |
490 last_bb = bb; | |
491 bb = *iter; | |
492 Instruction* insn = bb->instructions.back(); | |
493 if (BPF_CLASS(insn->code) == BPF_JMP) { | |
494 // Basic block ended in a jump instruction. We can now compute the | |
495 // appropriate offsets. | |
496 if (BPF_OP(insn->code) == BPF_JA) { | |
497 // "Always" jumps use the 32bit "k" field for the offset, instead | |
498 // of the 8bit "jt" and "jf" fields. | |
499 int jmp = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset; | |
500 insn->k = jmp; | |
501 insn->jt = insn->jf = 0; | |
502 } else { | |
503 // The offset computations for conditional jumps are just the same | |
504 // as for "always" jumps. | |
505 int jt = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset; | |
506 int jf = offset - targets_to_blocks.find(insn->jf_ptr)->second->offset; | |
507 | |
508 // There is an added complication, because conditional relative jumps | |
509 // can only jump at most 255 instructions forward. If we have to jump | |
510 // further, insert an extra "always" jump. | |
511 Instructions::size_type jmp = bb->instructions.size(); | |
512 if (jt > 255 || (jt == 255 && jf > 255)) { | |
513 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jt_ptr); | |
514 bb->instructions.push_back(ja); | |
515 ja->k = jt; | |
516 ja->jt = ja->jf = 0; | |
517 | |
518 // The newly inserted "always" jump, of course, requires us to adjust | |
519 // the jump targets in the original conditional jump. | |
520 jt = 0; | |
521 ++jf; | |
522 } | |
523 if (jf > 255) { | |
524 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jf_ptr); | |
525 bb->instructions.insert(bb->instructions.begin() + jmp, ja); | |
526 ja->k = jf; | |
527 ja->jt = ja->jf = 0; | |
528 | |
529 // Again, we have to adjust the jump targets in the original | |
530 // conditional jump. | |
531 ++jt; | |
532 jf = 0; | |
533 } | |
534 | |
535 // Now we can finally set the relative jump targets in the conditional | |
536 // jump instruction. Afterwards, we must no longer access the jt_ptr | |
537 // and jf_ptr fields. | |
538 insn->jt = jt; | |
539 insn->jf = jf; | |
540 } | |
541 } else if (BPF_CLASS(insn->code) != BPF_RET && | |
542 targets_to_blocks.find(insn->next)->second != last_bb) { | |
543 SANDBOX_DIE("Internal compiler error; invalid basic block encountered"); | |
544 } | |
545 | |
546 // Proceed to next basic block. | |
547 offset += bb->instructions.size(); | |
548 bb->offset = offset; | |
549 } | |
550 return; | |
551 } | |
552 | |
553 void CodeGen::ConcatenateBasicBlocks(const BasicBlocks& basic_blocks, | |
554 Program* program) { | |
555 // Our basic blocks have been sorted and relative jump offsets have been | |
556 // computed. The last remaining step is for all the instructions in our | |
557 // basic blocks to be concatenated into a BPF program. | |
558 program->clear(); | |
559 for (BasicBlocks::const_iterator bb_iter = basic_blocks.begin(); | |
560 bb_iter != basic_blocks.end(); | |
561 ++bb_iter) { | |
562 const BasicBlock& bb = **bb_iter; | |
563 for (Instructions::const_iterator insn_iter = bb.instructions.begin(); | |
564 insn_iter != bb.instructions.end(); | |
565 ++insn_iter) { | |
566 const Instruction& insn = **insn_iter; | |
567 program->push_back( | |
568 (struct sock_filter) {insn.code, insn.jt, insn.jf, insn.k}); | |
569 } | |
570 } | |
571 return; | |
572 } | |
573 | |
574 void CodeGen::Compile(Instruction* instructions, Program* program) { | |
575 if (compiled_) { | |
576 SANDBOX_DIE( | |
577 "Cannot call Compile() multiple times. Create a new code " | |
578 "generator instead"); | |
579 } | |
580 compiled_ = true; | |
581 | |
582 BranchTargets branch_targets; | |
583 FindBranchTargets(*instructions, &branch_targets); | |
584 TargetsToBlocks all_blocks; | |
585 BasicBlock* first_block = | |
586 CutGraphIntoBasicBlocks(instructions, branch_targets, &all_blocks); | |
587 MergeTails(&all_blocks); | |
588 BasicBlocks basic_blocks; | |
589 TopoSortBasicBlocks(first_block, all_blocks, &basic_blocks); | |
590 ComputeRelativeJumps(&basic_blocks, all_blocks); | |
591 ConcatenateBasicBlocks(basic_blocks, program); | |
592 return; | |
593 } | 151 } |
594 | 152 |
595 } // namespace sandbox | 153 } // namespace sandbox |
OLD | NEW |