OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "sandbox/linux/seccomp-bpf/codegen.h" |
| 6 |
| 7 #include <stdio.h> |
| 8 |
| 9 #include <set> |
| 10 |
| 11 #include "base/logging.h" |
| 12 #include "sandbox/linux/seccomp-bpf/basicblock.h" |
| 13 #include "sandbox/linux/seccomp-bpf/die.h" |
| 14 #include "sandbox/linux/seccomp-bpf/instruction.h" |
| 15 #include "sandbox/linux/seccomp-bpf/linux_seccomp.h" |
| 16 #include "sandbox/linux/seccomp-bpf/trap.h" |
| 17 |
| 18 namespace sandbox { |
| 19 |
| 20 CodeGen::CodeGen() : compiled_(false) {} |
| 21 |
| 22 CodeGen::~CodeGen() { |
| 23 for (Instructions::iterator iter = instructions_.begin(); |
| 24 iter != instructions_.end(); |
| 25 ++iter) { |
| 26 delete *iter; |
| 27 } |
| 28 for (BasicBlocks::iterator iter = basic_blocks_.begin(); |
| 29 iter != basic_blocks_.end(); |
| 30 ++iter) { |
| 31 delete *iter; |
| 32 } |
| 33 } |
| 34 |
| 35 void CodeGen::PrintProgram(const Program& program) { |
| 36 for (Program::const_iterator iter = program.begin(); iter != program.end(); |
| 37 ++iter) { |
| 38 int ip = (int)(iter - program.begin()); |
| 39 fprintf(stderr, "%3d) ", ip); |
| 40 switch (BPF_CLASS(iter->code)) { |
| 41 case BPF_LD: |
| 42 if (iter->code == BPF_LD + BPF_W + BPF_ABS) { |
| 43 fprintf(stderr, "LOAD %d // ", (int)iter->k); |
| 44 if (iter->k == offsetof(struct arch_seccomp_data, nr)) { |
| 45 fprintf(stderr, "System call number\n"); |
| 46 } else if (iter->k == offsetof(struct arch_seccomp_data, arch)) { |
| 47 fprintf(stderr, "Architecture\n"); |
| 48 } else if (iter->k == |
| 49 offsetof(struct arch_seccomp_data, instruction_pointer)) { |
| 50 fprintf(stderr, "Instruction pointer (LSB)\n"); |
| 51 } else if (iter->k == |
| 52 offsetof(struct arch_seccomp_data, instruction_pointer) + |
| 53 4) { |
| 54 fprintf(stderr, "Instruction pointer (MSB)\n"); |
| 55 } else if (iter->k >= offsetof(struct arch_seccomp_data, args) && |
| 56 iter->k < offsetof(struct arch_seccomp_data, args) + 48 && |
| 57 (iter->k - offsetof(struct arch_seccomp_data, args)) % 4 == |
| 58 0) { |
| 59 fprintf( |
| 60 stderr, |
| 61 "Argument %d (%cSB)\n", |
| 62 (int)(iter->k - offsetof(struct arch_seccomp_data, args)) / 8, |
| 63 (iter->k - offsetof(struct arch_seccomp_data, args)) % 8 ? 'M' |
| 64 : 'L'); |
| 65 } else { |
| 66 fprintf(stderr, "???\n"); |
| 67 } |
| 68 } else { |
| 69 fprintf(stderr, "LOAD ???\n"); |
| 70 } |
| 71 break; |
| 72 case BPF_JMP: |
| 73 if (BPF_OP(iter->code) == BPF_JA) { |
| 74 fprintf(stderr, "JMP %d\n", ip + iter->k + 1); |
| 75 } else { |
| 76 fprintf(stderr, "if A %s 0x%x; then JMP %d else JMP %d\n", |
| 77 BPF_OP(iter->code) == BPF_JSET ? "&" : |
| 78 BPF_OP(iter->code) == BPF_JEQ ? "==" : |
| 79 BPF_OP(iter->code) == BPF_JGE ? ">=" : |
| 80 BPF_OP(iter->code) == BPF_JGT ? ">" : "???", |
| 81 (int)iter->k, |
| 82 ip + iter->jt + 1, ip + iter->jf + 1); |
| 83 } |
| 84 break; |
| 85 case BPF_RET: |
| 86 fprintf(stderr, "RET 0x%x // ", iter->k); |
| 87 if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP) { |
| 88 fprintf(stderr, "Trap #%d\n", iter->k & SECCOMP_RET_DATA); |
| 89 } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) { |
| 90 fprintf(stderr, "errno = %d\n", iter->k & SECCOMP_RET_DATA); |
| 91 } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRACE) { |
| 92 fprintf(stderr, "Trace #%d\n", iter->k & SECCOMP_RET_DATA); |
| 93 } else if (iter->k == SECCOMP_RET_ALLOW) { |
| 94 fprintf(stderr, "Allowed\n"); |
| 95 } else { |
| 96 fprintf(stderr, "???\n"); |
| 97 } |
| 98 break; |
| 99 case BPF_ALU: |
| 100 fprintf(stderr, BPF_OP(iter->code) == BPF_NEG |
| 101 ? "A := -A\n" : "A := A %s 0x%x\n", |
| 102 BPF_OP(iter->code) == BPF_ADD ? "+" : |
| 103 BPF_OP(iter->code) == BPF_SUB ? "-" : |
| 104 BPF_OP(iter->code) == BPF_MUL ? "*" : |
| 105 BPF_OP(iter->code) == BPF_DIV ? "/" : |
| 106 BPF_OP(iter->code) == BPF_MOD ? "%" : |
| 107 BPF_OP(iter->code) == BPF_OR ? "|" : |
| 108 BPF_OP(iter->code) == BPF_XOR ? "^" : |
| 109 BPF_OP(iter->code) == BPF_AND ? "&" : |
| 110 BPF_OP(iter->code) == BPF_LSH ? "<<" : |
| 111 BPF_OP(iter->code) == BPF_RSH ? ">>" : "???", |
| 112 (int)iter->k); |
| 113 break; |
| 114 default: |
| 115 fprintf(stderr, "???\n"); |
| 116 break; |
| 117 } |
| 118 } |
| 119 return; |
| 120 } |
| 121 |
| 122 Instruction* CodeGen::MakeInstruction(uint16_t code, |
| 123 uint32_t k, |
| 124 Instruction* next) { |
| 125 // We can handle non-jumping instructions and "always" jumps. Both of |
| 126 // them are followed by exactly one "next" instruction. |
| 127 // We allow callers to defer specifying "next", but then they must call |
| 128 // "joinInstructions" later. |
| 129 if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) { |
| 130 SANDBOX_DIE( |
| 131 "Must provide both \"true\" and \"false\" branch " |
| 132 "for a BPF_JMP"); |
| 133 } |
| 134 if (next && BPF_CLASS(code) == BPF_RET) { |
| 135 SANDBOX_DIE("Cannot append instructions after a return statement"); |
| 136 } |
| 137 if (BPF_CLASS(code) == BPF_JMP) { |
| 138 // "Always" jumps use the "true" branch target, only. |
| 139 Instruction* insn = new Instruction(code, 0, next, NULL); |
| 140 instructions_.push_back(insn); |
| 141 return insn; |
| 142 } else { |
| 143 // Non-jumping instructions do not use any of the branch targets. |
| 144 Instruction* insn = new Instruction(code, k, next); |
| 145 instructions_.push_back(insn); |
| 146 return insn; |
| 147 } |
| 148 } |
| 149 |
| 150 Instruction* CodeGen::MakeInstruction(uint16_t code, |
| 151 uint32_t k, |
| 152 Instruction* jt, |
| 153 Instruction* jf) { |
| 154 // We can handle all conditional jumps. They are followed by both a |
| 155 // "true" and a "false" branch. |
| 156 if (BPF_CLASS(code) != BPF_JMP || BPF_OP(code) == BPF_JA) { |
| 157 SANDBOX_DIE("Expected a BPF_JMP instruction"); |
| 158 } |
| 159 if (!jt || !jf) { |
| 160 SANDBOX_DIE("Branches must jump to a valid instruction"); |
| 161 } |
| 162 Instruction* insn = new Instruction(code, k, jt, jf); |
| 163 instructions_.push_back(insn); |
| 164 return insn; |
| 165 } |
| 166 |
| 167 void CodeGen::FindBranchTargets(const Instruction& instructions, |
| 168 BranchTargets* branch_targets) { |
| 169 // Follow all possible paths through the "instructions" graph and compute |
| 170 // a list of branch targets. This will later be needed to compute the |
| 171 // boundaries of basic blocks. |
| 172 // We maintain a set of all instructions that we have previously seen. This |
| 173 // set ultimately converges on all instructions in the program. |
| 174 std::set<const Instruction*> seen_instructions; |
| 175 Instructions stack; |
| 176 for (const Instruction* insn = &instructions; insn;) { |
| 177 seen_instructions.insert(insn); |
| 178 if (BPF_CLASS(insn->code) == BPF_JMP) { |
| 179 // Found a jump. Increase count of incoming edges for each of the jump |
| 180 // targets. |
| 181 ++(*branch_targets)[insn->jt_ptr]; |
| 182 if (BPF_OP(insn->code) != BPF_JA) { |
| 183 ++(*branch_targets)[insn->jf_ptr]; |
| 184 stack.push_back(const_cast<Instruction*>(insn)); |
| 185 } |
| 186 // Start a recursive decent for depth-first traversal. |
| 187 if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) { |
| 188 // We haven't seen the "true" branch yet. Traverse it now. We have |
| 189 // already remembered the "false" branch on the stack and will |
| 190 // traverse it later. |
| 191 insn = insn->jt_ptr; |
| 192 continue; |
| 193 } else { |
| 194 // Now try traversing the "false" branch. |
| 195 insn = NULL; |
| 196 } |
| 197 } else { |
| 198 // This is a non-jump instruction, just continue to the next instruction |
| 199 // (if any). It's OK if "insn" becomes NULL when reaching a return |
| 200 // instruction. |
| 201 if (!insn->next != (BPF_CLASS(insn->code) == BPF_RET)) { |
| 202 SANDBOX_DIE( |
| 203 "Internal compiler error; return instruction must be at " |
| 204 "the end of the BPF program"); |
| 205 } |
| 206 if (seen_instructions.find(insn->next) == seen_instructions.end()) { |
| 207 insn = insn->next; |
| 208 } else { |
| 209 // We have seen this instruction before. That could happen if it is |
| 210 // a branch target. No need to continue processing. |
| 211 insn = NULL; |
| 212 } |
| 213 } |
| 214 while (!insn && !stack.empty()) { |
| 215 // We are done processing all the way to a leaf node, backtrack up the |
| 216 // stack to any branches that we haven't processed yet. By definition, |
| 217 // this has to be a "false" branch, as we always process the "true" |
| 218 // branches right away. |
| 219 insn = stack.back(); |
| 220 stack.pop_back(); |
| 221 if (seen_instructions.find(insn->jf_ptr) == seen_instructions.end()) { |
| 222 // We haven't seen the "false" branch yet. So, that's where we'll |
| 223 // go now. |
| 224 insn = insn->jf_ptr; |
| 225 } else { |
| 226 // We have seen both the "true" and the "false" branch, continue |
| 227 // up the stack. |
| 228 if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) { |
| 229 SANDBOX_DIE( |
| 230 "Internal compiler error; cannot find all " |
| 231 "branch targets"); |
| 232 } |
| 233 insn = NULL; |
| 234 } |
| 235 } |
| 236 } |
| 237 return; |
| 238 } |
| 239 |
| 240 BasicBlock* CodeGen::MakeBasicBlock(Instruction* head, Instruction* tail) { |
| 241 // Iterate over all the instructions between "head" and "tail" and |
| 242 // insert them into a new basic block. |
| 243 BasicBlock* bb = new BasicBlock; |
| 244 for (;; head = head->next) { |
| 245 bb->instructions.push_back(head); |
| 246 if (head == tail) { |
| 247 break; |
| 248 } |
| 249 if (BPF_CLASS(head->code) == BPF_JMP) { |
| 250 SANDBOX_DIE("Found a jump inside of a basic block"); |
| 251 } |
| 252 } |
| 253 basic_blocks_.push_back(bb); |
| 254 return bb; |
| 255 } |
| 256 |
| 257 void CodeGen::AddBasicBlock(Instruction* head, |
| 258 Instruction* tail, |
| 259 const BranchTargets& branch_targets, |
| 260 TargetsToBlocks* basic_blocks, |
| 261 BasicBlock** firstBlock) { |
| 262 // Add a new basic block to "basic_blocks". Also set "firstBlock", if it |
| 263 // has not been set before. |
| 264 BranchTargets::const_iterator iter = branch_targets.find(head); |
| 265 if ((iter == branch_targets.end()) != !*firstBlock || |
| 266 !*firstBlock != basic_blocks->empty()) { |
| 267 SANDBOX_DIE( |
| 268 "Only the very first basic block should have no " |
| 269 "incoming jumps"); |
| 270 } |
| 271 BasicBlock* bb = MakeBasicBlock(head, tail); |
| 272 if (!*firstBlock) { |
| 273 *firstBlock = bb; |
| 274 } |
| 275 (*basic_blocks)[head] = bb; |
| 276 return; |
| 277 } |
| 278 |
| 279 BasicBlock* CodeGen::CutGraphIntoBasicBlocks( |
| 280 Instruction* instructions, |
| 281 const BranchTargets& branch_targets, |
| 282 TargetsToBlocks* basic_blocks) { |
| 283 // Textbook implementation of a basic block generator. All basic blocks |
| 284 // start with a branch target and end with either a return statement or |
| 285 // a jump (or are followed by an instruction that forms the beginning of a |
| 286 // new block). Both conditional and "always" jumps are supported. |
| 287 BasicBlock* first_block = NULL; |
| 288 std::set<const Instruction*> seen_instructions; |
| 289 Instructions stack; |
| 290 Instruction* tail = NULL; |
| 291 Instruction* head = instructions; |
| 292 for (Instruction* insn = head; insn;) { |
| 293 if (seen_instructions.find(insn) != seen_instructions.end()) { |
| 294 // We somehow went in a circle. This should never be possible. Not even |
| 295 // cyclic graphs are supposed to confuse us this much. |
| 296 SANDBOX_DIE("Internal compiler error; cannot compute basic blocks"); |
| 297 } |
| 298 seen_instructions.insert(insn); |
| 299 if (tail && branch_targets.find(insn) != branch_targets.end()) { |
| 300 // We reached a branch target. Start a new basic block (this means, |
| 301 // flushing the previous basic block first). |
| 302 AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block); |
| 303 head = insn; |
| 304 } |
| 305 if (BPF_CLASS(insn->code) == BPF_JMP) { |
| 306 // We reached a jump instruction, this completes our current basic |
| 307 // block. Flush it and continue by traversing both the true and the |
| 308 // false branch of the jump. We need to maintain a stack to do so. |
| 309 AddBasicBlock(head, insn, branch_targets, basic_blocks, &first_block); |
| 310 if (BPF_OP(insn->code) != BPF_JA) { |
| 311 stack.push_back(insn->jf_ptr); |
| 312 } |
| 313 insn = insn->jt_ptr; |
| 314 |
| 315 // If we are jumping to an instruction that we have previously |
| 316 // processed, we are done with this branch. Continue by backtracking |
| 317 // up the stack. |
| 318 while (seen_instructions.find(insn) != seen_instructions.end()) { |
| 319 backtracking: |
| 320 if (stack.empty()) { |
| 321 // We successfully traversed all reachable instructions. |
| 322 return first_block; |
| 323 } else { |
| 324 // Going up the stack. |
| 325 insn = stack.back(); |
| 326 stack.pop_back(); |
| 327 } |
| 328 } |
| 329 // Starting a new basic block. |
| 330 tail = NULL; |
| 331 head = insn; |
| 332 } else { |
| 333 // We found a non-jumping instruction, append it to current basic |
| 334 // block. |
| 335 tail = insn; |
| 336 insn = insn->next; |
| 337 if (!insn) { |
| 338 // We reached a return statement, flush the current basic block and |
| 339 // backtrack up the stack. |
| 340 AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block); |
| 341 goto backtracking; |
| 342 } |
| 343 } |
| 344 } |
| 345 return first_block; |
| 346 } |
| 347 |
| 348 // We define a comparator that inspects the sequence of instructions in our |
| 349 // basic block and any blocks referenced by this block. This function can be |
| 350 // used in a "less" comparator for the purpose of storing pointers to basic |
| 351 // blocks in STL containers; this gives an easy option to use STL to find |
| 352 // shared tail sequences of basic blocks. |
| 353 static int PointerCompare(const BasicBlock* block1, |
| 354 const BasicBlock* block2, |
| 355 const TargetsToBlocks& blocks) { |
| 356 // Return <0, 0, or >0 depending on the ordering of "block1" and "block2". |
| 357 // If we are looking at the exact same block, this is trivial and we don't |
| 358 // need to do a full comparison. |
| 359 if (block1 == block2) { |
| 360 return 0; |
| 361 } |
| 362 |
| 363 // We compare the sequence of instructions in both basic blocks. |
| 364 const Instructions& insns1 = block1->instructions; |
| 365 const Instructions& insns2 = block2->instructions; |
| 366 // Basic blocks should never be empty. |
| 367 CHECK(!insns1.empty()); |
| 368 CHECK(!insns2.empty()); |
| 369 |
| 370 Instructions::const_iterator iter1 = insns1.begin(); |
| 371 Instructions::const_iterator iter2 = insns2.begin(); |
| 372 for (;; ++iter1, ++iter2) { |
| 373 // If we have reached the end of the sequence of instructions in one or |
| 374 // both basic blocks, we know the relative ordering between the two blocks |
| 375 // and can return. |
| 376 if (iter1 == insns1.end() || iter2 == insns2.end()) { |
| 377 if (iter1 != insns1.end()) { |
| 378 return 1; |
| 379 } |
| 380 if (iter2 != insns2.end()) { |
| 381 return -1; |
| 382 } |
| 383 |
| 384 // If the two blocks are the same length (and have elementwise-equal code |
| 385 // and k fields) and their last instructions are neither a JMP nor a RET |
| 386 // (which is the only way we can reach this point), then we must compare |
| 387 // their successors. |
| 388 Instruction* const insns1_last = insns1.back(); |
| 389 Instruction* const insns2_last = insns2.back(); |
| 390 CHECK(BPF_CLASS(insns1_last->code) != BPF_JMP && |
| 391 BPF_CLASS(insns1_last->code) != BPF_RET); |
| 392 |
| 393 // Non jumping instructions will always have a valid next instruction. |
| 394 CHECK(insns1_last->next); |
| 395 CHECK(insns2_last->next); |
| 396 return PointerCompare(blocks.find(insns1_last->next)->second, |
| 397 blocks.find(insns2_last->next)->second, |
| 398 blocks); |
| 399 } |
| 400 |
| 401 // Compare the individual fields for both instructions. |
| 402 const Instruction& insn1 = **iter1; |
| 403 const Instruction& insn2 = **iter2; |
| 404 if (insn1.code != insn2.code) { |
| 405 return insn1.code - insn2.code; |
| 406 } |
| 407 if (insn1.k != insn2.k) { |
| 408 return insn1.k - insn2.k; |
| 409 } |
| 410 |
| 411 // Sanity check: If we're looking at a JMP or RET instruction, by definition |
| 412 // it should be the last instruction of the basic block. |
| 413 if (BPF_CLASS(insn1.code) == BPF_JMP || BPF_CLASS(insn1.code) == BPF_RET) { |
| 414 CHECK_EQ(insns1.back(), &insn1); |
| 415 CHECK_EQ(insns2.back(), &insn2); |
| 416 } |
| 417 |
| 418 // RET instructions terminate execution, and only JMP instructions use the |
| 419 // jt_ptr and jf_ptr fields. Anything else can continue to the next |
| 420 // instruction in the basic block. |
| 421 if (BPF_CLASS(insn1.code) == BPF_RET) { |
| 422 return 0; |
| 423 } else if (BPF_CLASS(insn1.code) != BPF_JMP) { |
| 424 continue; |
| 425 } |
| 426 |
| 427 // Recursively compare the "true" and "false" branches. |
| 428 // A well-formed BPF program can't have any cycles, so we know |
| 429 // that our recursive algorithm will ultimately terminate. |
| 430 // In the unlikely event that the programmer made a mistake and |
| 431 // went out of the way to give us a cyclic program, we will crash |
| 432 // with a stack overflow. We are OK with that. |
| 433 if (BPF_OP(insn1.code) != BPF_JA) { |
| 434 int c = PointerCompare(blocks.find(insn1.jf_ptr)->second, |
| 435 blocks.find(insn2.jf_ptr)->second, |
| 436 blocks); |
| 437 if (c != 0) { |
| 438 return c; |
| 439 } |
| 440 } |
| 441 return PointerCompare(blocks.find(insn1.jt_ptr)->second, |
| 442 blocks.find(insn2.jt_ptr)->second, |
| 443 blocks); |
| 444 } |
| 445 } |
| 446 |
| 447 void CodeGen::MergeTails(TargetsToBlocks* blocks) { |
| 448 // We enter all of our basic blocks into a set using the BasicBlock::Less() |
| 449 // comparator. This naturally results in blocks with identical tails of |
| 450 // instructions to map to the same entry in the set. Whenever we discover |
| 451 // that a particular chain of instructions is already in the set, we merge |
| 452 // the basic blocks and update the pointer in the "blocks" map. |
| 453 // Returns the number of unique basic blocks. |
| 454 // N.B. We don't merge instructions on a granularity that is finer than |
| 455 // a basic block. In practice, this is sufficiently rare that we don't |
| 456 // incur a big cost. |
| 457 // Similarly, we currently don't merge anything other than tails. In |
| 458 // the future, we might decide to revisit this decision and attempt to |
| 459 // merge arbitrary sub-sequences of instructions. |
| 460 BasicBlock::Less<TargetsToBlocks> less(*blocks, PointerCompare); |
| 461 typedef std::set<BasicBlock*, BasicBlock::Less<TargetsToBlocks> > Set; |
| 462 Set seen_basic_blocks(less); |
| 463 for (TargetsToBlocks::iterator iter = blocks->begin(); iter != blocks->end(); |
| 464 ++iter) { |
| 465 BasicBlock* bb = iter->second; |
| 466 Set::const_iterator entry = seen_basic_blocks.find(bb); |
| 467 if (entry == seen_basic_blocks.end()) { |
| 468 // This is the first time we see this particular sequence of |
| 469 // instructions. Enter the basic block into the set of known |
| 470 // basic blocks. |
| 471 seen_basic_blocks.insert(bb); |
| 472 } else { |
| 473 // We have previously seen another basic block that defines the same |
| 474 // sequence of instructions. Merge the two blocks and update the |
| 475 // pointer in the "blocks" map. |
| 476 iter->second = *entry; |
| 477 } |
| 478 } |
| 479 } |
| 480 |
| 481 void CodeGen::ComputeIncomingBranches(BasicBlock* block, |
| 482 const TargetsToBlocks& targets_to_blocks, |
| 483 IncomingBranches* incoming_branches) { |
| 484 // We increment the number of incoming branches each time we encounter a |
| 485 // basic block. But we only traverse recursively the very first time we |
| 486 // encounter a new block. This is necessary to make topological sorting |
| 487 // work correctly. |
| 488 if (++(*incoming_branches)[block] == 1) { |
| 489 Instruction* last_insn = block->instructions.back(); |
| 490 if (BPF_CLASS(last_insn->code) == BPF_JMP) { |
| 491 ComputeIncomingBranches(targets_to_blocks.find(last_insn->jt_ptr)->second, |
| 492 targets_to_blocks, |
| 493 incoming_branches); |
| 494 if (BPF_OP(last_insn->code) != BPF_JA) { |
| 495 ComputeIncomingBranches( |
| 496 targets_to_blocks.find(last_insn->jf_ptr)->second, |
| 497 targets_to_blocks, |
| 498 incoming_branches); |
| 499 } |
| 500 } else if (BPF_CLASS(last_insn->code) != BPF_RET) { |
| 501 ComputeIncomingBranches(targets_to_blocks.find(last_insn->next)->second, |
| 502 targets_to_blocks, |
| 503 incoming_branches); |
| 504 } |
| 505 } |
| 506 } |
| 507 |
| 508 void CodeGen::TopoSortBasicBlocks(BasicBlock* first_block, |
| 509 const TargetsToBlocks& blocks, |
| 510 BasicBlocks* basic_blocks) { |
| 511 // Textbook implementation of a toposort. We keep looking for basic blocks |
| 512 // that don't have any incoming branches (initially, this is just the |
| 513 // "first_block") and add them to the topologically sorted list of |
| 514 // "basic_blocks". As we do so, we remove outgoing branches. This potentially |
| 515 // ends up making our descendants eligible for the sorted list. The |
| 516 // sorting algorithm terminates when there are no more basic blocks that have |
| 517 // no incoming branches. If we didn't move all blocks from the set of |
| 518 // "unordered_blocks" to the sorted list of "basic_blocks", there must have |
| 519 // been a cyclic dependency. This should never happen in a BPF program, as |
| 520 // well-formed BPF programs only ever have forward branches. |
| 521 IncomingBranches unordered_blocks; |
| 522 ComputeIncomingBranches(first_block, blocks, &unordered_blocks); |
| 523 |
| 524 std::set<BasicBlock*> heads; |
| 525 for (;;) { |
| 526 // Move block from "unordered_blocks" to "basic_blocks". |
| 527 basic_blocks->push_back(first_block); |
| 528 |
| 529 // Inspect last instruction in the basic block. This is typically either a |
| 530 // jump or a return statement. But it could also be a "normal" instruction |
| 531 // that is followed by a jump target. |
| 532 Instruction* last_insn = first_block->instructions.back(); |
| 533 if (BPF_CLASS(last_insn->code) == BPF_JMP) { |
| 534 // Remove outgoing branches. This might end up moving our descendants |
| 535 // into set of "head" nodes that no longer have any incoming branches. |
| 536 TargetsToBlocks::const_iterator iter; |
| 537 if (BPF_OP(last_insn->code) != BPF_JA) { |
| 538 iter = blocks.find(last_insn->jf_ptr); |
| 539 if (!--unordered_blocks[iter->second]) { |
| 540 heads.insert(iter->second); |
| 541 } |
| 542 } |
| 543 iter = blocks.find(last_insn->jt_ptr); |
| 544 if (!--unordered_blocks[iter->second]) { |
| 545 first_block = iter->second; |
| 546 continue; |
| 547 } |
| 548 } else if (BPF_CLASS(last_insn->code) != BPF_RET) { |
| 549 // We encountered an instruction that doesn't change code flow. Try to |
| 550 // pick the next "first_block" from "last_insn->next", if possible. |
| 551 TargetsToBlocks::const_iterator iter; |
| 552 iter = blocks.find(last_insn->next); |
| 553 if (!--unordered_blocks[iter->second]) { |
| 554 first_block = iter->second; |
| 555 continue; |
| 556 } else { |
| 557 // Our basic block is supposed to be followed by "last_insn->next", |
| 558 // but dependencies prevent this from happening. Insert a BPF_JA |
| 559 // instruction to correct the code flow. |
| 560 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, last_insn->next); |
| 561 first_block->instructions.push_back(ja); |
| 562 last_insn->next = ja; |
| 563 } |
| 564 } |
| 565 if (heads.empty()) { |
| 566 if (unordered_blocks.size() != basic_blocks->size()) { |
| 567 SANDBOX_DIE("Internal compiler error; cyclic graph detected"); |
| 568 } |
| 569 return; |
| 570 } |
| 571 // Proceed by picking an arbitrary node from the set of basic blocks that |
| 572 // do not have any incoming branches. |
| 573 first_block = *heads.begin(); |
| 574 heads.erase(heads.begin()); |
| 575 } |
| 576 } |
| 577 |
| 578 void CodeGen::ComputeRelativeJumps(BasicBlocks* basic_blocks, |
| 579 const TargetsToBlocks& targets_to_blocks) { |
| 580 // While we previously used pointers in jt_ptr and jf_ptr to link jump |
| 581 // instructions to their targets, we now convert these jumps to relative |
| 582 // jumps that are suitable for loading the BPF program into the kernel. |
| 583 int offset = 0; |
| 584 |
| 585 // Since we just completed a toposort, all jump targets are guaranteed to |
| 586 // go forward. This means, iterating over the basic blocks in reverse makes |
| 587 // it trivial to compute the correct offsets. |
| 588 BasicBlock* bb = NULL; |
| 589 BasicBlock* last_bb = NULL; |
| 590 for (BasicBlocks::reverse_iterator iter = basic_blocks->rbegin(); |
| 591 iter != basic_blocks->rend(); |
| 592 ++iter) { |
| 593 last_bb = bb; |
| 594 bb = *iter; |
| 595 Instruction* insn = bb->instructions.back(); |
| 596 if (BPF_CLASS(insn->code) == BPF_JMP) { |
| 597 // Basic block ended in a jump instruction. We can now compute the |
| 598 // appropriate offsets. |
| 599 if (BPF_OP(insn->code) == BPF_JA) { |
| 600 // "Always" jumps use the 32bit "k" field for the offset, instead |
| 601 // of the 8bit "jt" and "jf" fields. |
| 602 int jmp = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset; |
| 603 insn->k = jmp; |
| 604 insn->jt = insn->jf = 0; |
| 605 } else { |
| 606 // The offset computations for conditional jumps are just the same |
| 607 // as for "always" jumps. |
| 608 int jt = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset; |
| 609 int jf = offset - targets_to_blocks.find(insn->jf_ptr)->second->offset; |
| 610 |
| 611 // There is an added complication, because conditional relative jumps |
| 612 // can only jump at most 255 instructions forward. If we have to jump |
| 613 // further, insert an extra "always" jump. |
| 614 Instructions::size_type jmp = bb->instructions.size(); |
| 615 if (jt > 255 || (jt == 255 && jf > 255)) { |
| 616 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jt_ptr); |
| 617 bb->instructions.push_back(ja); |
| 618 ja->k = jt; |
| 619 ja->jt = ja->jf = 0; |
| 620 |
| 621 // The newly inserted "always" jump, of course, requires us to adjust |
| 622 // the jump targets in the original conditional jump. |
| 623 jt = 0; |
| 624 ++jf; |
| 625 } |
| 626 if (jf > 255) { |
| 627 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jf_ptr); |
| 628 bb->instructions.insert(bb->instructions.begin() + jmp, ja); |
| 629 ja->k = jf; |
| 630 ja->jt = ja->jf = 0; |
| 631 |
| 632 // Again, we have to adjust the jump targets in the original |
| 633 // conditional jump. |
| 634 ++jt; |
| 635 jf = 0; |
| 636 } |
| 637 |
| 638 // Now we can finally set the relative jump targets in the conditional |
| 639 // jump instruction. Afterwards, we must no longer access the jt_ptr |
| 640 // and jf_ptr fields. |
| 641 insn->jt = jt; |
| 642 insn->jf = jf; |
| 643 } |
| 644 } else if (BPF_CLASS(insn->code) != BPF_RET && |
| 645 targets_to_blocks.find(insn->next)->second != last_bb) { |
| 646 SANDBOX_DIE("Internal compiler error; invalid basic block encountered"); |
| 647 } |
| 648 |
| 649 // Proceed to next basic block. |
| 650 offset += bb->instructions.size(); |
| 651 bb->offset = offset; |
| 652 } |
| 653 return; |
| 654 } |
| 655 |
| 656 void CodeGen::ConcatenateBasicBlocks(const BasicBlocks& basic_blocks, |
| 657 Program* program) { |
| 658 // Our basic blocks have been sorted and relative jump offsets have been |
| 659 // computed. The last remaining step is for all the instructions in our |
| 660 // basic blocks to be concatenated into a BPF program. |
| 661 program->clear(); |
| 662 for (BasicBlocks::const_iterator bb_iter = basic_blocks.begin(); |
| 663 bb_iter != basic_blocks.end(); |
| 664 ++bb_iter) { |
| 665 const BasicBlock& bb = **bb_iter; |
| 666 for (Instructions::const_iterator insn_iter = bb.instructions.begin(); |
| 667 insn_iter != bb.instructions.end(); |
| 668 ++insn_iter) { |
| 669 const Instruction& insn = **insn_iter; |
| 670 program->push_back( |
| 671 (struct sock_filter) {insn.code, insn.jt, insn.jf, insn.k}); |
| 672 } |
| 673 } |
| 674 return; |
| 675 } |
| 676 |
| 677 void CodeGen::Compile(Instruction* instructions, Program* program) { |
| 678 if (compiled_) { |
| 679 SANDBOX_DIE( |
| 680 "Cannot call Compile() multiple times. Create a new code " |
| 681 "generator instead"); |
| 682 } |
| 683 compiled_ = true; |
| 684 |
| 685 BranchTargets branch_targets; |
| 686 FindBranchTargets(*instructions, &branch_targets); |
| 687 TargetsToBlocks all_blocks; |
| 688 BasicBlock* first_block = |
| 689 CutGraphIntoBasicBlocks(instructions, branch_targets, &all_blocks); |
| 690 MergeTails(&all_blocks); |
| 691 BasicBlocks basic_blocks; |
| 692 TopoSortBasicBlocks(first_block, all_blocks, &basic_blocks); |
| 693 ComputeRelativeJumps(&basic_blocks, all_blocks); |
| 694 ConcatenateBasicBlocks(basic_blocks, program); |
| 695 return; |
| 696 } |
| 697 |
| 698 } // namespace sandbox |
OLD | NEW |