| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "sandbox/linux/seccomp-bpf/codegen.h" | 5 #include "sandbox/linux/seccomp-bpf/codegen.h" |
| 6 | 6 |
| 7 #include <linux/filter.h> | 7 #include <linux/filter.h> |
| 8 | 8 |
| 9 #include <set> | 9 #include <limits> |
| 10 #include <utility> |
| 10 | 11 |
| 11 #include "base/logging.h" | 12 #include "base/logging.h" |
| 12 #include "sandbox/linux/seccomp-bpf/basicblock.h" | 13 |
| 13 #include "sandbox/linux/seccomp-bpf/die.h" | 14 // This CodeGen implementation strives for simplicity while still |
| 14 #include "sandbox/linux/seccomp-bpf/instruction.h" | 15 // generating acceptable BPF programs under typical usage patterns |
| 16 // (e.g., by PolicyCompiler). |
| 17 // |
| 18 // The key to its simplicity is that BPF programs only support forward |
| 19 // jumps/branches, which allows constraining the DAG construction API |
| 20 // to make instruction nodes immutable. Immutable nodes admits a |
| 21 // simple greedy approach of emitting new instructions as needed and |
| 22 // then reusing existing ones that have already been emitted. This |
| 23 // cleanly avoids any need to compute basic blocks or apply |
| 24 // topological sorting because the API effectively sorts instructions |
| 25 // for us (e.g., before MakeInstruction() can be called to emit a |
| 26 // branch instruction, it must have already been called for each |
| 27 // branch path). |
| 28 // |
| 29 // This greedy algorithm is not without (theoretical) weakness though: |
| 30 // |
| 31 // 1. In the general case, we don't eliminate dead code. If needed, |
| 32 // we could trace back through the program in Compile() and elide |
| 33 // any unneeded instructions, but in practice we only emit live |
| 34 // instructions anyway. |
| 35 // |
| 36 // 2. By not dividing instructions into basic blocks and sorting, we |
| 37 // lose an opportunity to move non-branch/non-return instructions |
| 38 // adjacent to their successor instructions, which means we might |
| 39 // need to emit additional jumps. But in practice, they'll |
| 40 // already be nearby as long as callers don't go out of their way |
| 41 // to interleave MakeInstruction() calls for unrelated code |
| 42 // sequences. |
| 15 | 43 |
| 16 namespace sandbox { | 44 namespace sandbox { |
| 17 | 45 |
| 18 // Unfortunately this needs to be defined out-of-line because inline | 46 // kBranchRange is the maximum value that can be stored in |
| 19 // initializing a static member to "nullptr" requires "constexpr", | 47 // sock_filter's 8-bit jt and jf fields. |
| 20 // which is currently banned by the Chromium style guide. | 48 const size_t kBranchRange = std::numeric_limits<uint8_t>::max(); |
| 21 const CodeGen::Node CodeGen::kNullNode = nullptr; | |
| 22 | 49 |
| 23 CodeGen::CodeGen() : compiled_(false) {} | 50 const CodeGen::Node CodeGen::kNullNode; |
| 51 |
| 52 CodeGen::CodeGen() : program_(), memos_() { |
| 53 } |
| 24 | 54 |
| 25 CodeGen::~CodeGen() { | 55 CodeGen::~CodeGen() { |
| 26 for (Instructions::iterator iter = instructions_.begin(); | 56 } |
| 27 iter != instructions_.end(); | 57 |
| 28 ++iter) { | 58 void CodeGen::Compile(CodeGen::Node head, Program* out) { |
| 29 delete *iter; | 59 DCHECK(out); |
| 30 } | 60 out->assign(program_.rbegin() + Offset(head), program_.rend()); |
| 31 for (BasicBlocks::iterator iter = basic_blocks_.begin(); | |
| 32 iter != basic_blocks_.end(); | |
| 33 ++iter) { | |
| 34 delete *iter; | |
| 35 } | |
| 36 } | 61 } |
| 37 | 62 |
| 38 CodeGen::Node CodeGen::MakeInstruction(uint16_t code, | 63 CodeGen::Node CodeGen::MakeInstruction(uint16_t code, |
| 39 uint32_t k, | 64 uint32_t k, |
| 40 Node jt, | 65 Node jt, |
| 41 Node jf) { | 66 Node jf) { |
| 42 Node insn; | 67 // To avoid generating redundant code sequences, we memoize the |
| 43 if (BPF_CLASS(code) == BPF_JMP) { | 68 // results from AppendInstruction(). |
| 44 CHECK_NE(kNullNode, jt); | 69 auto res = memos_.insert(std::make_pair(MemoKey(code, k, jt, jf), kNullNode)); |
| 45 if (BPF_OP(code) == BPF_JA) { | 70 CodeGen::Node* node = &res.first->second; |
| 46 CHECK_EQ(kNullNode, jf); | 71 if (res.second) { // Newly inserted memo entry. |
| 47 } else { | 72 *node = AppendInstruction(code, k, jt, jf); |
| 48 CHECK_NE(kNullNode, jf); | |
| 49 } | |
| 50 insn = new Instruction(code, k, jt, jf); | |
| 51 } else { | |
| 52 if (BPF_CLASS(code) == BPF_RET) { | |
| 53 CHECK_EQ(kNullNode, jt); | |
| 54 } else { | |
| 55 CHECK_NE(kNullNode, jt); | |
| 56 } | |
| 57 CHECK_EQ(kNullNode, jf); | |
| 58 insn = new Instruction(code, k, jt); | |
| 59 } | 73 } |
| 60 instructions_.push_back(insn); | 74 return *node; |
| 61 return insn; | |
| 62 } | 75 } |
| 63 | 76 |
| 64 void CodeGen::FindBranchTargets(const Instruction& instructions, | 77 CodeGen::Node CodeGen::AppendInstruction(uint16_t code, |
| 65 BranchTargets* branch_targets) { | 78 uint32_t k, |
| 66 // Follow all possible paths through the "instructions" graph and compute | 79 Node jt, |
| 67 // a list of branch targets. This will later be needed to compute the | 80 Node jf) { |
| 68 // boundaries of basic blocks. | 81 if (BPF_CLASS(code) == BPF_JMP) { |
| 69 // We maintain a set of all instructions that we have previously seen. This | 82 CHECK_NE(BPF_JA, BPF_OP(code)) << "CodeGen inserts JAs as needed"; |
| 70 // set ultimately converges on all instructions in the program. | 83 |
| 71 std::set<const Instruction*> seen_instructions; | 84 // We need to check |jt| twice because it might get pushed |
| 72 Instructions stack; | 85 // out-of-range by appending a jump for |jf|. |
| 73 for (const Instruction* insn = &instructions; insn;) { | 86 jt = WithinRange(jt, kBranchRange); |
| 74 seen_instructions.insert(insn); | 87 jf = WithinRange(jf, kBranchRange); |
| 75 if (BPF_CLASS(insn->code) == BPF_JMP) { | 88 jt = WithinRange(jt, kBranchRange); |
| 76 // Found a jump. Increase count of incoming edges for each of the jump | 89 return Append(code, k, Offset(jt), Offset(jf)); |
| 77 // targets. | |
| 78 ++(*branch_targets)[insn->jt_ptr]; | |
| 79 if (BPF_OP(insn->code) != BPF_JA) { | |
| 80 ++(*branch_targets)[insn->jf_ptr]; | |
| 81 stack.push_back(const_cast<Instruction*>(insn)); | |
| 82 } | |
| 83 // Start a recursive decent for depth-first traversal. | |
| 84 if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) { | |
| 85 // We haven't seen the "true" branch yet. Traverse it now. We have | |
| 86 // already remembered the "false" branch on the stack and will | |
| 87 // traverse it later. | |
| 88 insn = insn->jt_ptr; | |
| 89 continue; | |
| 90 } else { | |
| 91 // Now try traversing the "false" branch. | |
| 92 insn = NULL; | |
| 93 } | |
| 94 } else { | |
| 95 // This is a non-jump instruction, just continue to the next instruction | |
| 96 // (if any). It's OK if "insn" becomes NULL when reaching a return | |
| 97 // instruction. | |
| 98 if (!insn->next != (BPF_CLASS(insn->code) == BPF_RET)) { | |
| 99 SANDBOX_DIE( | |
| 100 "Internal compiler error; return instruction must be at " | |
| 101 "the end of the BPF program"); | |
| 102 } | |
| 103 if (seen_instructions.find(insn->next) == seen_instructions.end()) { | |
| 104 insn = insn->next; | |
| 105 } else { | |
| 106 // We have seen this instruction before. That could happen if it is | |
| 107 // a branch target. No need to continue processing. | |
| 108 insn = NULL; | |
| 109 } | |
| 110 } | |
| 111 while (!insn && !stack.empty()) { | |
| 112 // We are done processing all the way to a leaf node, backtrack up the | |
| 113 // stack to any branches that we haven't processed yet. By definition, | |
| 114 // this has to be a "false" branch, as we always process the "true" | |
| 115 // branches right away. | |
| 116 insn = stack.back(); | |
| 117 stack.pop_back(); | |
| 118 if (seen_instructions.find(insn->jf_ptr) == seen_instructions.end()) { | |
| 119 // We haven't seen the "false" branch yet. So, that's where we'll | |
| 120 // go now. | |
| 121 insn = insn->jf_ptr; | |
| 122 } else { | |
| 123 // We have seen both the "true" and the "false" branch, continue | |
| 124 // up the stack. | |
| 125 if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) { | |
| 126 SANDBOX_DIE( | |
| 127 "Internal compiler error; cannot find all " | |
| 128 "branch targets"); | |
| 129 } | |
| 130 insn = NULL; | |
| 131 } | |
| 132 } | |
| 133 } | 90 } |
| 134 return; | 91 |
| 92 CHECK_EQ(kNullNode, jf) << "Non-branch instructions shouldn't provide jf"; |
| 93 if (BPF_CLASS(code) == BPF_RET) { |
| 94 CHECK_EQ(kNullNode, jt) << "Return instructions shouldn't provide jt"; |
| 95 } else { |
| 96 // For non-branch/non-return instructions, execution always |
| 97 // proceeds to the next instruction; so we need to arrange for |
| 98 // that to be |jt|. |
| 99 jt = WithinRange(jt, 0); |
| 100 } |
| 101 return Append(code, k, 0, 0); |
| 135 } | 102 } |
| 136 | 103 |
| 137 BasicBlock* CodeGen::MakeBasicBlock(Instruction* head, Instruction* tail) { | 104 CodeGen::Node CodeGen::WithinRange(Node target, size_t range) { |
| 138 // Iterate over all the instructions between "head" and "tail" and | 105 if (Offset(target) > range) { |
| 139 // insert them into a new basic block. | 106 // TODO(mdempsky): If |range > 0|, we might be able to reuse an |
| 140 BasicBlock* bb = new BasicBlock; | 107 // existing instruction within that range. |
| 141 for (;; head = head->next) { | 108 |
| 142 bb->instructions.push_back(head); | 109 // TODO(mdempsky): If |target| is a branch or return, it might be |
| 143 if (head == tail) { | 110 // possible to duplicate that instruction rather than jump to it. |
| 144 break; | 111 |
| 145 } | 112 // Fall back to emitting a jump instruction. |
| 146 if (BPF_CLASS(head->code) == BPF_JMP) { | 113 target = Append(BPF_JMP | BPF_JA, Offset(target), 0, 0); |
| 147 SANDBOX_DIE("Found a jump inside of a basic block"); | |
| 148 } | |
| 149 } | 114 } |
| 150 basic_blocks_.push_back(bb); | 115 |
| 151 return bb; | 116 CHECK_LE(Offset(target), range) << "ICE: Failed to bring target within range"; |
| 117 return target; |
| 152 } | 118 } |
| 153 | 119 |
| 154 void CodeGen::AddBasicBlock(Instruction* head, | 120 CodeGen::Node CodeGen::Append(uint16_t code, uint32_t k, size_t jt, size_t jf) { |
| 155 Instruction* tail, | 121 if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) { |
| 156 const BranchTargets& branch_targets, | 122 CHECK_LE(jt, kBranchRange); |
| 157 TargetsToBlocks* basic_blocks, | 123 CHECK_LE(jf, kBranchRange); |
| 158 BasicBlock** firstBlock) { | 124 } else { |
| 159 // Add a new basic block to "basic_blocks". Also set "firstBlock", if it | 125 CHECK_EQ(0U, jt); |
| 160 // has not been set before. | 126 CHECK_EQ(0U, jf); |
| 161 BranchTargets::const_iterator iter = branch_targets.find(head); | |
| 162 if ((iter == branch_targets.end()) != !*firstBlock || | |
| 163 !*firstBlock != basic_blocks->empty()) { | |
| 164 SANDBOX_DIE( | |
| 165 "Only the very first basic block should have no " | |
| 166 "incoming jumps"); | |
| 167 } | 127 } |
| 168 BasicBlock* bb = MakeBasicBlock(head, tail); | 128 |
| 169 if (!*firstBlock) { | 129 CHECK_LT(program_.size(), static_cast<size_t>(BPF_MAXINSNS)); |
| 170 *firstBlock = bb; | 130 program_.push_back(sock_filter{code, jt, jf, k}); |
| 171 } | 131 return program_.size() - 1; |
| 172 (*basic_blocks)[head] = bb; | |
| 173 return; | |
| 174 } | 132 } |
| 175 | 133 |
| 176 BasicBlock* CodeGen::CutGraphIntoBasicBlocks( | 134 size_t CodeGen::Offset(Node target) const { |
| 177 Instruction* instructions, | 135 CHECK_LT(target, program_.size()) << "Bogus offset target node"; |
| 178 const BranchTargets& branch_targets, | 136 return (program_.size() - 1) - target; |
| 179 TargetsToBlocks* basic_blocks) { | |
| 180 // Textbook implementation of a basic block generator. All basic blocks | |
| 181 // start with a branch target and end with either a return statement or | |
| 182 // a jump (or are followed by an instruction that forms the beginning of a | |
| 183 // new block). Both conditional and "always" jumps are supported. | |
| 184 BasicBlock* first_block = NULL; | |
| 185 std::set<const Instruction*> seen_instructions; | |
| 186 Instructions stack; | |
| 187 Instruction* tail = NULL; | |
| 188 Instruction* head = instructions; | |
| 189 for (Instruction* insn = head; insn;) { | |
| 190 if (seen_instructions.find(insn) != seen_instructions.end()) { | |
| 191 // We somehow went in a circle. This should never be possible. Not even | |
| 192 // cyclic graphs are supposed to confuse us this much. | |
| 193 SANDBOX_DIE("Internal compiler error; cannot compute basic blocks"); | |
| 194 } | |
| 195 seen_instructions.insert(insn); | |
| 196 if (tail && branch_targets.find(insn) != branch_targets.end()) { | |
| 197 // We reached a branch target. Start a new basic block (this means, | |
| 198 // flushing the previous basic block first). | |
| 199 AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block); | |
| 200 head = insn; | |
| 201 } | |
| 202 if (BPF_CLASS(insn->code) == BPF_JMP) { | |
| 203 // We reached a jump instruction, this completes our current basic | |
| 204 // block. Flush it and continue by traversing both the true and the | |
| 205 // false branch of the jump. We need to maintain a stack to do so. | |
| 206 AddBasicBlock(head, insn, branch_targets, basic_blocks, &first_block); | |
| 207 if (BPF_OP(insn->code) != BPF_JA) { | |
| 208 stack.push_back(insn->jf_ptr); | |
| 209 } | |
| 210 insn = insn->jt_ptr; | |
| 211 | |
| 212 // If we are jumping to an instruction that we have previously | |
| 213 // processed, we are done with this branch. Continue by backtracking | |
| 214 // up the stack. | |
| 215 while (seen_instructions.find(insn) != seen_instructions.end()) { | |
| 216 backtracking: | |
| 217 if (stack.empty()) { | |
| 218 // We successfully traversed all reachable instructions. | |
| 219 return first_block; | |
| 220 } else { | |
| 221 // Going up the stack. | |
| 222 insn = stack.back(); | |
| 223 stack.pop_back(); | |
| 224 } | |
| 225 } | |
| 226 // Starting a new basic block. | |
| 227 tail = NULL; | |
| 228 head = insn; | |
| 229 } else { | |
| 230 // We found a non-jumping instruction, append it to current basic | |
| 231 // block. | |
| 232 tail = insn; | |
| 233 insn = insn->next; | |
| 234 if (!insn) { | |
| 235 // We reached a return statement, flush the current basic block and | |
| 236 // backtrack up the stack. | |
| 237 AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block); | |
| 238 goto backtracking; | |
| 239 } | |
| 240 } | |
| 241 } | |
| 242 return first_block; | |
| 243 } | 137 } |
| 244 | 138 |
| 245 // We define a comparator that inspects the sequence of instructions in our | 139 // TODO(mdempsky): Move into a general base::Tuple helper library. |
| 246 // basic block and any blocks referenced by this block. This function can be | 140 bool CodeGen::MemoKeyLess::operator()(const MemoKey& lhs, |
| 247 // used in a "less" comparator for the purpose of storing pointers to basic | 141 const MemoKey& rhs) const { |
| 248 // blocks in STL containers; this gives an easy option to use STL to find | 142 if (lhs.a != rhs.a) |
| 249 // shared tail sequences of basic blocks. | 143 return lhs.a < rhs.a; |
| 250 static int PointerCompare(const BasicBlock* block1, | 144 if (lhs.b != rhs.b) |
| 251 const BasicBlock* block2, | 145 return lhs.b < rhs.b; |
| 252 const TargetsToBlocks& blocks) { | 146 if (lhs.c != rhs.c) |
| 253 // Return <0, 0, or >0 depending on the ordering of "block1" and "block2". | 147 return lhs.c < rhs.c; |
| 254 // If we are looking at the exact same block, this is trivial and we don't | 148 if (lhs.d != rhs.d) |
| 255 // need to do a full comparison. | 149 return lhs.d < rhs.d; |
| 256 if (block1 == block2) { | 150 return false; |
| 257 return 0; | |
| 258 } | |
| 259 | |
| 260 // We compare the sequence of instructions in both basic blocks. | |
| 261 const Instructions& insns1 = block1->instructions; | |
| 262 const Instructions& insns2 = block2->instructions; | |
| 263 // Basic blocks should never be empty. | |
| 264 CHECK(!insns1.empty()); | |
| 265 CHECK(!insns2.empty()); | |
| 266 | |
| 267 Instructions::const_iterator iter1 = insns1.begin(); | |
| 268 Instructions::const_iterator iter2 = insns2.begin(); | |
| 269 for (;; ++iter1, ++iter2) { | |
| 270 // If we have reached the end of the sequence of instructions in one or | |
| 271 // both basic blocks, we know the relative ordering between the two blocks | |
| 272 // and can return. | |
| 273 if (iter1 == insns1.end() || iter2 == insns2.end()) { | |
| 274 if (iter1 != insns1.end()) { | |
| 275 return 1; | |
| 276 } | |
| 277 if (iter2 != insns2.end()) { | |
| 278 return -1; | |
| 279 } | |
| 280 | |
| 281 // If the two blocks are the same length (and have elementwise-equal code | |
| 282 // and k fields) and their last instructions are neither a JMP nor a RET | |
| 283 // (which is the only way we can reach this point), then we must compare | |
| 284 // their successors. | |
| 285 Instruction* const insns1_last = insns1.back(); | |
| 286 Instruction* const insns2_last = insns2.back(); | |
| 287 CHECK(BPF_CLASS(insns1_last->code) != BPF_JMP && | |
| 288 BPF_CLASS(insns1_last->code) != BPF_RET); | |
| 289 | |
| 290 // Non jumping instructions will always have a valid next instruction. | |
| 291 CHECK(insns1_last->next); | |
| 292 CHECK(insns2_last->next); | |
| 293 return PointerCompare(blocks.find(insns1_last->next)->second, | |
| 294 blocks.find(insns2_last->next)->second, | |
| 295 blocks); | |
| 296 } | |
| 297 | |
| 298 // Compare the individual fields for both instructions. | |
| 299 const Instruction& insn1 = **iter1; | |
| 300 const Instruction& insn2 = **iter2; | |
| 301 if (insn1.code != insn2.code) { | |
| 302 return insn1.code - insn2.code; | |
| 303 } | |
| 304 if (insn1.k != insn2.k) { | |
| 305 return insn1.k - insn2.k; | |
| 306 } | |
| 307 | |
| 308 // Sanity check: If we're looking at a JMP or RET instruction, by definition | |
| 309 // it should be the last instruction of the basic block. | |
| 310 if (BPF_CLASS(insn1.code) == BPF_JMP || BPF_CLASS(insn1.code) == BPF_RET) { | |
| 311 CHECK_EQ(insns1.back(), &insn1); | |
| 312 CHECK_EQ(insns2.back(), &insn2); | |
| 313 } | |
| 314 | |
| 315 // RET instructions terminate execution, and only JMP instructions use the | |
| 316 // jt_ptr and jf_ptr fields. Anything else can continue to the next | |
| 317 // instruction in the basic block. | |
| 318 if (BPF_CLASS(insn1.code) == BPF_RET) { | |
| 319 return 0; | |
| 320 } else if (BPF_CLASS(insn1.code) != BPF_JMP) { | |
| 321 continue; | |
| 322 } | |
| 323 | |
| 324 // Recursively compare the "true" and "false" branches. | |
| 325 // A well-formed BPF program can't have any cycles, so we know | |
| 326 // that our recursive algorithm will ultimately terminate. | |
| 327 // In the unlikely event that the programmer made a mistake and | |
| 328 // went out of the way to give us a cyclic program, we will crash | |
| 329 // with a stack overflow. We are OK with that. | |
| 330 if (BPF_OP(insn1.code) != BPF_JA) { | |
| 331 int c = PointerCompare(blocks.find(insn1.jf_ptr)->second, | |
| 332 blocks.find(insn2.jf_ptr)->second, | |
| 333 blocks); | |
| 334 if (c != 0) { | |
| 335 return c; | |
| 336 } | |
| 337 } | |
| 338 return PointerCompare(blocks.find(insn1.jt_ptr)->second, | |
| 339 blocks.find(insn2.jt_ptr)->second, | |
| 340 blocks); | |
| 341 } | |
| 342 } | |
| 343 | |
| 344 void CodeGen::MergeTails(TargetsToBlocks* blocks) { | |
| 345 // We enter all of our basic blocks into a set using the BasicBlock::Less() | |
| 346 // comparator. This naturally results in blocks with identical tails of | |
| 347 // instructions to map to the same entry in the set. Whenever we discover | |
| 348 // that a particular chain of instructions is already in the set, we merge | |
| 349 // the basic blocks and update the pointer in the "blocks" map. | |
| 350 // Returns the number of unique basic blocks. | |
| 351 // N.B. We don't merge instructions on a granularity that is finer than | |
| 352 // a basic block. In practice, this is sufficiently rare that we don't | |
| 353 // incur a big cost. | |
| 354 // Similarly, we currently don't merge anything other than tails. In | |
| 355 // the future, we might decide to revisit this decision and attempt to | |
| 356 // merge arbitrary sub-sequences of instructions. | |
| 357 BasicBlock::Less<TargetsToBlocks> less(*blocks, PointerCompare); | |
| 358 typedef std::set<BasicBlock*, BasicBlock::Less<TargetsToBlocks> > Set; | |
| 359 Set seen_basic_blocks(less); | |
| 360 for (TargetsToBlocks::iterator iter = blocks->begin(); iter != blocks->end(); | |
| 361 ++iter) { | |
| 362 BasicBlock* bb = iter->second; | |
| 363 Set::const_iterator entry = seen_basic_blocks.find(bb); | |
| 364 if (entry == seen_basic_blocks.end()) { | |
| 365 // This is the first time we see this particular sequence of | |
| 366 // instructions. Enter the basic block into the set of known | |
| 367 // basic blocks. | |
| 368 seen_basic_blocks.insert(bb); | |
| 369 } else { | |
| 370 // We have previously seen another basic block that defines the same | |
| 371 // sequence of instructions. Merge the two blocks and update the | |
| 372 // pointer in the "blocks" map. | |
| 373 iter->second = *entry; | |
| 374 } | |
| 375 } | |
| 376 } | |
| 377 | |
| 378 void CodeGen::ComputeIncomingBranches(BasicBlock* block, | |
| 379 const TargetsToBlocks& targets_to_blocks, | |
| 380 IncomingBranches* incoming_branches) { | |
| 381 // We increment the number of incoming branches each time we encounter a | |
| 382 // basic block. But we only traverse recursively the very first time we | |
| 383 // encounter a new block. This is necessary to make topological sorting | |
| 384 // work correctly. | |
| 385 if (++(*incoming_branches)[block] == 1) { | |
| 386 Instruction* last_insn = block->instructions.back(); | |
| 387 if (BPF_CLASS(last_insn->code) == BPF_JMP) { | |
| 388 ComputeIncomingBranches(targets_to_blocks.find(last_insn->jt_ptr)->second, | |
| 389 targets_to_blocks, | |
| 390 incoming_branches); | |
| 391 if (BPF_OP(last_insn->code) != BPF_JA) { | |
| 392 ComputeIncomingBranches( | |
| 393 targets_to_blocks.find(last_insn->jf_ptr)->second, | |
| 394 targets_to_blocks, | |
| 395 incoming_branches); | |
| 396 } | |
| 397 } else if (BPF_CLASS(last_insn->code) != BPF_RET) { | |
| 398 ComputeIncomingBranches(targets_to_blocks.find(last_insn->next)->second, | |
| 399 targets_to_blocks, | |
| 400 incoming_branches); | |
| 401 } | |
| 402 } | |
| 403 } | |
| 404 | |
| 405 void CodeGen::TopoSortBasicBlocks(BasicBlock* first_block, | |
| 406 const TargetsToBlocks& blocks, | |
| 407 BasicBlocks* basic_blocks) { | |
| 408 // Textbook implementation of a toposort. We keep looking for basic blocks | |
| 409 // that don't have any incoming branches (initially, this is just the | |
| 410 // "first_block") and add them to the topologically sorted list of | |
| 411 // "basic_blocks". As we do so, we remove outgoing branches. This potentially | |
| 412 // ends up making our descendants eligible for the sorted list. The | |
| 413 // sorting algorithm terminates when there are no more basic blocks that have | |
| 414 // no incoming branches. If we didn't move all blocks from the set of | |
| 415 // "unordered_blocks" to the sorted list of "basic_blocks", there must have | |
| 416 // been a cyclic dependency. This should never happen in a BPF program, as | |
| 417 // well-formed BPF programs only ever have forward branches. | |
| 418 IncomingBranches unordered_blocks; | |
| 419 ComputeIncomingBranches(first_block, blocks, &unordered_blocks); | |
| 420 | |
| 421 std::set<BasicBlock*> heads; | |
| 422 for (;;) { | |
| 423 // Move block from "unordered_blocks" to "basic_blocks". | |
| 424 basic_blocks->push_back(first_block); | |
| 425 | |
| 426 // Inspect last instruction in the basic block. This is typically either a | |
| 427 // jump or a return statement. But it could also be a "normal" instruction | |
| 428 // that is followed by a jump target. | |
| 429 Instruction* last_insn = first_block->instructions.back(); | |
| 430 if (BPF_CLASS(last_insn->code) == BPF_JMP) { | |
| 431 // Remove outgoing branches. This might end up moving our descendants | |
| 432 // into set of "head" nodes that no longer have any incoming branches. | |
| 433 TargetsToBlocks::const_iterator iter; | |
| 434 if (BPF_OP(last_insn->code) != BPF_JA) { | |
| 435 iter = blocks.find(last_insn->jf_ptr); | |
| 436 if (!--unordered_blocks[iter->second]) { | |
| 437 heads.insert(iter->second); | |
| 438 } | |
| 439 } | |
| 440 iter = blocks.find(last_insn->jt_ptr); | |
| 441 if (!--unordered_blocks[iter->second]) { | |
| 442 first_block = iter->second; | |
| 443 continue; | |
| 444 } | |
| 445 } else if (BPF_CLASS(last_insn->code) != BPF_RET) { | |
| 446 // We encountered an instruction that doesn't change code flow. Try to | |
| 447 // pick the next "first_block" from "last_insn->next", if possible. | |
| 448 TargetsToBlocks::const_iterator iter; | |
| 449 iter = blocks.find(last_insn->next); | |
| 450 if (!--unordered_blocks[iter->second]) { | |
| 451 first_block = iter->second; | |
| 452 continue; | |
| 453 } else { | |
| 454 // Our basic block is supposed to be followed by "last_insn->next", | |
| 455 // but dependencies prevent this from happening. Insert a BPF_JA | |
| 456 // instruction to correct the code flow. | |
| 457 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, last_insn->next); | |
| 458 first_block->instructions.push_back(ja); | |
| 459 last_insn->next = ja; | |
| 460 } | |
| 461 } | |
| 462 if (heads.empty()) { | |
| 463 if (unordered_blocks.size() != basic_blocks->size()) { | |
| 464 SANDBOX_DIE("Internal compiler error; cyclic graph detected"); | |
| 465 } | |
| 466 return; | |
| 467 } | |
| 468 // Proceed by picking an arbitrary node from the set of basic blocks that | |
| 469 // do not have any incoming branches. | |
| 470 first_block = *heads.begin(); | |
| 471 heads.erase(heads.begin()); | |
| 472 } | |
| 473 } | |
| 474 | |
| 475 void CodeGen::ComputeRelativeJumps(BasicBlocks* basic_blocks, | |
| 476 const TargetsToBlocks& targets_to_blocks) { | |
| 477 // While we previously used pointers in jt_ptr and jf_ptr to link jump | |
| 478 // instructions to their targets, we now convert these jumps to relative | |
| 479 // jumps that are suitable for loading the BPF program into the kernel. | |
| 480 int offset = 0; | |
| 481 | |
| 482 // Since we just completed a toposort, all jump targets are guaranteed to | |
| 483 // go forward. This means, iterating over the basic blocks in reverse makes | |
| 484 // it trivial to compute the correct offsets. | |
| 485 BasicBlock* bb = NULL; | |
| 486 BasicBlock* last_bb = NULL; | |
| 487 for (BasicBlocks::reverse_iterator iter = basic_blocks->rbegin(); | |
| 488 iter != basic_blocks->rend(); | |
| 489 ++iter) { | |
| 490 last_bb = bb; | |
| 491 bb = *iter; | |
| 492 Instruction* insn = bb->instructions.back(); | |
| 493 if (BPF_CLASS(insn->code) == BPF_JMP) { | |
| 494 // Basic block ended in a jump instruction. We can now compute the | |
| 495 // appropriate offsets. | |
| 496 if (BPF_OP(insn->code) == BPF_JA) { | |
| 497 // "Always" jumps use the 32bit "k" field for the offset, instead | |
| 498 // of the 8bit "jt" and "jf" fields. | |
| 499 int jmp = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset; | |
| 500 insn->k = jmp; | |
| 501 insn->jt = insn->jf = 0; | |
| 502 } else { | |
| 503 // The offset computations for conditional jumps are just the same | |
| 504 // as for "always" jumps. | |
| 505 int jt = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset; | |
| 506 int jf = offset - targets_to_blocks.find(insn->jf_ptr)->second->offset; | |
| 507 | |
| 508 // There is an added complication, because conditional relative jumps | |
| 509 // can only jump at most 255 instructions forward. If we have to jump | |
| 510 // further, insert an extra "always" jump. | |
| 511 Instructions::size_type jmp = bb->instructions.size(); | |
| 512 if (jt > 255 || (jt == 255 && jf > 255)) { | |
| 513 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jt_ptr); | |
| 514 bb->instructions.push_back(ja); | |
| 515 ja->k = jt; | |
| 516 ja->jt = ja->jf = 0; | |
| 517 | |
| 518 // The newly inserted "always" jump, of course, requires us to adjust | |
| 519 // the jump targets in the original conditional jump. | |
| 520 jt = 0; | |
| 521 ++jf; | |
| 522 } | |
| 523 if (jf > 255) { | |
| 524 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jf_ptr); | |
| 525 bb->instructions.insert(bb->instructions.begin() + jmp, ja); | |
| 526 ja->k = jf; | |
| 527 ja->jt = ja->jf = 0; | |
| 528 | |
| 529 // Again, we have to adjust the jump targets in the original | |
| 530 // conditional jump. | |
| 531 ++jt; | |
| 532 jf = 0; | |
| 533 } | |
| 534 | |
| 535 // Now we can finally set the relative jump targets in the conditional | |
| 536 // jump instruction. Afterwards, we must no longer access the jt_ptr | |
| 537 // and jf_ptr fields. | |
| 538 insn->jt = jt; | |
| 539 insn->jf = jf; | |
| 540 } | |
| 541 } else if (BPF_CLASS(insn->code) != BPF_RET && | |
| 542 targets_to_blocks.find(insn->next)->second != last_bb) { | |
| 543 SANDBOX_DIE("Internal compiler error; invalid basic block encountered"); | |
| 544 } | |
| 545 | |
| 546 // Proceed to next basic block. | |
| 547 offset += bb->instructions.size(); | |
| 548 bb->offset = offset; | |
| 549 } | |
| 550 return; | |
| 551 } | |
| 552 | |
| 553 void CodeGen::ConcatenateBasicBlocks(const BasicBlocks& basic_blocks, | |
| 554 Program* program) { | |
| 555 // Our basic blocks have been sorted and relative jump offsets have been | |
| 556 // computed. The last remaining step is for all the instructions in our | |
| 557 // basic blocks to be concatenated into a BPF program. | |
| 558 program->clear(); | |
| 559 for (BasicBlocks::const_iterator bb_iter = basic_blocks.begin(); | |
| 560 bb_iter != basic_blocks.end(); | |
| 561 ++bb_iter) { | |
| 562 const BasicBlock& bb = **bb_iter; | |
| 563 for (Instructions::const_iterator insn_iter = bb.instructions.begin(); | |
| 564 insn_iter != bb.instructions.end(); | |
| 565 ++insn_iter) { | |
| 566 const Instruction& insn = **insn_iter; | |
| 567 program->push_back( | |
| 568 (struct sock_filter) {insn.code, insn.jt, insn.jf, insn.k}); | |
| 569 } | |
| 570 } | |
| 571 return; | |
| 572 } | |
| 573 | |
| 574 void CodeGen::Compile(Instruction* instructions, Program* program) { | |
| 575 if (compiled_) { | |
| 576 SANDBOX_DIE( | |
| 577 "Cannot call Compile() multiple times. Create a new code " | |
| 578 "generator instead"); | |
| 579 } | |
| 580 compiled_ = true; | |
| 581 | |
| 582 BranchTargets branch_targets; | |
| 583 FindBranchTargets(*instructions, &branch_targets); | |
| 584 TargetsToBlocks all_blocks; | |
| 585 BasicBlock* first_block = | |
| 586 CutGraphIntoBasicBlocks(instructions, branch_targets, &all_blocks); | |
| 587 MergeTails(&all_blocks); | |
| 588 BasicBlocks basic_blocks; | |
| 589 TopoSortBasicBlocks(first_block, all_blocks, &basic_blocks); | |
| 590 ComputeRelativeJumps(&basic_blocks, all_blocks); | |
| 591 ConcatenateBasicBlocks(basic_blocks, program); | |
| 592 return; | |
| 593 } | 151 } |
| 594 | 152 |
| 595 } // namespace sandbox | 153 } // namespace sandbox |
| OLD | NEW |