| Index: sandbox/linux/seccomp-bpf/codegen.cc
|
| diff --git a/sandbox/linux/seccomp-bpf/codegen.cc b/sandbox/linux/seccomp-bpf/codegen.cc
|
| index 18d26daa0e9897daa3f5f6386b190dc0e94f790a..3ed06cb4013a68f832a808b26e2d398ef78cab5c 100644
|
| --- a/sandbox/linux/seccomp-bpf/codegen.cc
|
| +++ b/sandbox/linux/seccomp-bpf/codegen.cc
|
| @@ -6,590 +6,148 @@
|
|
|
| #include <linux/filter.h>
|
|
|
| -#include <set>
|
| +#include <limits>
|
| +#include <utility>
|
|
|
| #include "base/logging.h"
|
| -#include "sandbox/linux/seccomp-bpf/basicblock.h"
|
| -#include "sandbox/linux/seccomp-bpf/die.h"
|
| -#include "sandbox/linux/seccomp-bpf/instruction.h"
|
| +
|
| +// This CodeGen implementation strives for simplicity while still
|
| +// generating acceptable BPF programs under typical usage patterns
|
| +// (e.g., by PolicyCompiler).
|
| +//
|
| +// The key to its simplicity is that BPF programs only support forward
|
| +// jumps/branches, which allows constraining the DAG construction API
|
| +// to make instruction nodes immutable. Immutable nodes admits a
|
| +// simple greedy approach of emitting new instructions as needed and
|
| +// then reusing existing ones that have already been emitted. This
|
| +// cleanly avoids any need to compute basic blocks or apply
|
| +// topological sorting because the API effectively sorts instructions
|
| +// for us (e.g., before MakeInstruction() can be called to emit a
|
| +// branch instruction, it must have already been called for each
|
| +// branch path).
|
| +//
|
| +// This greedy algorithm is not without (theoretical) weakness though:
|
| +//
|
| +// 1. In the general case, we don't eliminate dead code. If needed,
|
| +// we could trace back through the program in Compile() and elide
|
| +// any unneeded instructions, but in practice we only emit live
|
| +// instructions anyway.
|
| +//
|
| +// 2. By not dividing instructions into basic blocks and sorting, we
|
| +// lose an opportunity to move non-branch/non-return instructions
|
| +// adjacent to their successor instructions, which means we might
|
| +// need to emit additional jumps. But in practice, they'll
|
| +// already be nearby as long as callers don't go out of their way
|
| +// to interleave MakeInstruction() calls for unrelated code
|
| +// sequences.
|
|
|
| namespace sandbox {
|
|
|
| -// Unfortunately this needs to be defined out-of-line because inline
|
| -// initializing a static member to "nullptr" requires "constexpr",
|
| -// which is currently banned by the Chromium style guide.
|
| -const CodeGen::Node CodeGen::kNullNode = nullptr;
|
| +// kBranchRange is the maximum value that can be stored in
|
| +// sock_filter's 8-bit jt and jf fields.
|
| +const size_t kBranchRange = std::numeric_limits<uint8_t>::max();
|
| +
|
| +const CodeGen::Node CodeGen::kNullNode;
|
|
|
| -CodeGen::CodeGen() : compiled_(false) {}
|
| +CodeGen::CodeGen() : program_(), memos_() {
|
| +}
|
|
|
| CodeGen::~CodeGen() {
|
| - for (Instructions::iterator iter = instructions_.begin();
|
| - iter != instructions_.end();
|
| - ++iter) {
|
| - delete *iter;
|
| - }
|
| - for (BasicBlocks::iterator iter = basic_blocks_.begin();
|
| - iter != basic_blocks_.end();
|
| - ++iter) {
|
| - delete *iter;
|
| - }
|
| +}
|
| +
|
| +void CodeGen::Compile(CodeGen::Node head, Program* out) {
|
| + DCHECK(out);
|
| + out->assign(program_.rbegin() + Offset(head), program_.rend());
|
| }
|
|
|
| CodeGen::Node CodeGen::MakeInstruction(uint16_t code,
|
| uint32_t k,
|
| Node jt,
|
| Node jf) {
|
| - Node insn;
|
| - if (BPF_CLASS(code) == BPF_JMP) {
|
| - CHECK_NE(kNullNode, jt);
|
| - if (BPF_OP(code) == BPF_JA) {
|
| - CHECK_EQ(kNullNode, jf);
|
| - } else {
|
| - CHECK_NE(kNullNode, jf);
|
| - }
|
| - insn = new Instruction(code, k, jt, jf);
|
| - } else {
|
| - if (BPF_CLASS(code) == BPF_RET) {
|
| - CHECK_EQ(kNullNode, jt);
|
| - } else {
|
| - CHECK_NE(kNullNode, jt);
|
| - }
|
| - CHECK_EQ(kNullNode, jf);
|
| - insn = new Instruction(code, k, jt);
|
| - }
|
| - instructions_.push_back(insn);
|
| - return insn;
|
| + // To avoid generating redundant code sequences, we memoize the
|
| + // results from AppendInstruction().
|
| + auto res = memos_.insert(std::make_pair(MemoKey(code, k, jt, jf), kNullNode));
|
| + CodeGen::Node* node = &res.first->second;
|
| + if (res.second) { // Newly inserted memo entry.
|
| + *node = AppendInstruction(code, k, jt, jf);
|
| + }
|
| + return *node;
|
| }
|
|
|
| -void CodeGen::FindBranchTargets(const Instruction& instructions,
|
| - BranchTargets* branch_targets) {
|
| - // Follow all possible paths through the "instructions" graph and compute
|
| - // a list of branch targets. This will later be needed to compute the
|
| - // boundaries of basic blocks.
|
| - // We maintain a set of all instructions that we have previously seen. This
|
| - // set ultimately converges on all instructions in the program.
|
| - std::set<const Instruction*> seen_instructions;
|
| - Instructions stack;
|
| - for (const Instruction* insn = &instructions; insn;) {
|
| - seen_instructions.insert(insn);
|
| - if (BPF_CLASS(insn->code) == BPF_JMP) {
|
| - // Found a jump. Increase count of incoming edges for each of the jump
|
| - // targets.
|
| - ++(*branch_targets)[insn->jt_ptr];
|
| - if (BPF_OP(insn->code) != BPF_JA) {
|
| - ++(*branch_targets)[insn->jf_ptr];
|
| - stack.push_back(const_cast<Instruction*>(insn));
|
| - }
|
| - // Start a recursive decent for depth-first traversal.
|
| - if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
|
| - // We haven't seen the "true" branch yet. Traverse it now. We have
|
| - // already remembered the "false" branch on the stack and will
|
| - // traverse it later.
|
| - insn = insn->jt_ptr;
|
| - continue;
|
| - } else {
|
| - // Now try traversing the "false" branch.
|
| - insn = NULL;
|
| - }
|
| - } else {
|
| - // This is a non-jump instruction, just continue to the next instruction
|
| - // (if any). It's OK if "insn" becomes NULL when reaching a return
|
| - // instruction.
|
| - if (!insn->next != (BPF_CLASS(insn->code) == BPF_RET)) {
|
| - SANDBOX_DIE(
|
| - "Internal compiler error; return instruction must be at "
|
| - "the end of the BPF program");
|
| - }
|
| - if (seen_instructions.find(insn->next) == seen_instructions.end()) {
|
| - insn = insn->next;
|
| - } else {
|
| - // We have seen this instruction before. That could happen if it is
|
| - // a branch target. No need to continue processing.
|
| - insn = NULL;
|
| - }
|
| - }
|
| - while (!insn && !stack.empty()) {
|
| - // We are done processing all the way to a leaf node, backtrack up the
|
| - // stack to any branches that we haven't processed yet. By definition,
|
| - // this has to be a "false" branch, as we always process the "true"
|
| - // branches right away.
|
| - insn = stack.back();
|
| - stack.pop_back();
|
| - if (seen_instructions.find(insn->jf_ptr) == seen_instructions.end()) {
|
| - // We haven't seen the "false" branch yet. So, that's where we'll
|
| - // go now.
|
| - insn = insn->jf_ptr;
|
| - } else {
|
| - // We have seen both the "true" and the "false" branch, continue
|
| - // up the stack.
|
| - if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
|
| - SANDBOX_DIE(
|
| - "Internal compiler error; cannot find all "
|
| - "branch targets");
|
| - }
|
| - insn = NULL;
|
| - }
|
| - }
|
| - }
|
| - return;
|
| -}
|
| -
|
| -BasicBlock* CodeGen::MakeBasicBlock(Instruction* head, Instruction* tail) {
|
| - // Iterate over all the instructions between "head" and "tail" and
|
| - // insert them into a new basic block.
|
| - BasicBlock* bb = new BasicBlock;
|
| - for (;; head = head->next) {
|
| - bb->instructions.push_back(head);
|
| - if (head == tail) {
|
| - break;
|
| - }
|
| - if (BPF_CLASS(head->code) == BPF_JMP) {
|
| - SANDBOX_DIE("Found a jump inside of a basic block");
|
| - }
|
| - }
|
| - basic_blocks_.push_back(bb);
|
| - return bb;
|
| -}
|
| +CodeGen::Node CodeGen::AppendInstruction(uint16_t code,
|
| + uint32_t k,
|
| + Node jt,
|
| + Node jf) {
|
| + if (BPF_CLASS(code) == BPF_JMP) {
|
| + CHECK_NE(BPF_JA, BPF_OP(code)) << "CodeGen inserts JAs as needed";
|
|
|
| -void CodeGen::AddBasicBlock(Instruction* head,
|
| - Instruction* tail,
|
| - const BranchTargets& branch_targets,
|
| - TargetsToBlocks* basic_blocks,
|
| - BasicBlock** firstBlock) {
|
| - // Add a new basic block to "basic_blocks". Also set "firstBlock", if it
|
| - // has not been set before.
|
| - BranchTargets::const_iterator iter = branch_targets.find(head);
|
| - if ((iter == branch_targets.end()) != !*firstBlock ||
|
| - !*firstBlock != basic_blocks->empty()) {
|
| - SANDBOX_DIE(
|
| - "Only the very first basic block should have no "
|
| - "incoming jumps");
|
| - }
|
| - BasicBlock* bb = MakeBasicBlock(head, tail);
|
| - if (!*firstBlock) {
|
| - *firstBlock = bb;
|
| + // We need to check |jt| twice because it might get pushed
|
| + // out-of-range by appending a jump for |jf|.
|
| + jt = WithinRange(jt, kBranchRange);
|
| + jf = WithinRange(jf, kBranchRange);
|
| + jt = WithinRange(jt, kBranchRange);
|
| + return Append(code, k, Offset(jt), Offset(jf));
|
| }
|
| - (*basic_blocks)[head] = bb;
|
| - return;
|
| -}
|
|
|
| -BasicBlock* CodeGen::CutGraphIntoBasicBlocks(
|
| - Instruction* instructions,
|
| - const BranchTargets& branch_targets,
|
| - TargetsToBlocks* basic_blocks) {
|
| - // Textbook implementation of a basic block generator. All basic blocks
|
| - // start with a branch target and end with either a return statement or
|
| - // a jump (or are followed by an instruction that forms the beginning of a
|
| - // new block). Both conditional and "always" jumps are supported.
|
| - BasicBlock* first_block = NULL;
|
| - std::set<const Instruction*> seen_instructions;
|
| - Instructions stack;
|
| - Instruction* tail = NULL;
|
| - Instruction* head = instructions;
|
| - for (Instruction* insn = head; insn;) {
|
| - if (seen_instructions.find(insn) != seen_instructions.end()) {
|
| - // We somehow went in a circle. This should never be possible. Not even
|
| - // cyclic graphs are supposed to confuse us this much.
|
| - SANDBOX_DIE("Internal compiler error; cannot compute basic blocks");
|
| - }
|
| - seen_instructions.insert(insn);
|
| - if (tail && branch_targets.find(insn) != branch_targets.end()) {
|
| - // We reached a branch target. Start a new basic block (this means,
|
| - // flushing the previous basic block first).
|
| - AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
|
| - head = insn;
|
| - }
|
| - if (BPF_CLASS(insn->code) == BPF_JMP) {
|
| - // We reached a jump instruction, this completes our current basic
|
| - // block. Flush it and continue by traversing both the true and the
|
| - // false branch of the jump. We need to maintain a stack to do so.
|
| - AddBasicBlock(head, insn, branch_targets, basic_blocks, &first_block);
|
| - if (BPF_OP(insn->code) != BPF_JA) {
|
| - stack.push_back(insn->jf_ptr);
|
| - }
|
| - insn = insn->jt_ptr;
|
| -
|
| - // If we are jumping to an instruction that we have previously
|
| - // processed, we are done with this branch. Continue by backtracking
|
| - // up the stack.
|
| - while (seen_instructions.find(insn) != seen_instructions.end()) {
|
| - backtracking:
|
| - if (stack.empty()) {
|
| - // We successfully traversed all reachable instructions.
|
| - return first_block;
|
| - } else {
|
| - // Going up the stack.
|
| - insn = stack.back();
|
| - stack.pop_back();
|
| - }
|
| - }
|
| - // Starting a new basic block.
|
| - tail = NULL;
|
| - head = insn;
|
| - } else {
|
| - // We found a non-jumping instruction, append it to current basic
|
| - // block.
|
| - tail = insn;
|
| - insn = insn->next;
|
| - if (!insn) {
|
| - // We reached a return statement, flush the current basic block and
|
| - // backtrack up the stack.
|
| - AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
|
| - goto backtracking;
|
| - }
|
| - }
|
| + CHECK_EQ(kNullNode, jf) << "Non-branch instructions shouldn't provide jf";
|
| + if (BPF_CLASS(code) == BPF_RET) {
|
| + CHECK_EQ(kNullNode, jt) << "Return instructions shouldn't provide jt";
|
| + } else {
|
| + // For non-branch/non-return instructions, execution always
|
| + // proceeds to the next instruction; so we need to arrange for
|
| + // that to be |jt|.
|
| + jt = WithinRange(jt, 0);
|
| }
|
| - return first_block;
|
| + return Append(code, k, 0, 0);
|
| }
|
|
|
| -// We define a comparator that inspects the sequence of instructions in our
|
| -// basic block and any blocks referenced by this block. This function can be
|
| -// used in a "less" comparator for the purpose of storing pointers to basic
|
| -// blocks in STL containers; this gives an easy option to use STL to find
|
| -// shared tail sequences of basic blocks.
|
| -static int PointerCompare(const BasicBlock* block1,
|
| - const BasicBlock* block2,
|
| - const TargetsToBlocks& blocks) {
|
| - // Return <0, 0, or >0 depending on the ordering of "block1" and "block2".
|
| - // If we are looking at the exact same block, this is trivial and we don't
|
| - // need to do a full comparison.
|
| - if (block1 == block2) {
|
| - return 0;
|
| - }
|
| -
|
| - // We compare the sequence of instructions in both basic blocks.
|
| - const Instructions& insns1 = block1->instructions;
|
| - const Instructions& insns2 = block2->instructions;
|
| - // Basic blocks should never be empty.
|
| - CHECK(!insns1.empty());
|
| - CHECK(!insns2.empty());
|
| -
|
| - Instructions::const_iterator iter1 = insns1.begin();
|
| - Instructions::const_iterator iter2 = insns2.begin();
|
| - for (;; ++iter1, ++iter2) {
|
| - // If we have reached the end of the sequence of instructions in one or
|
| - // both basic blocks, we know the relative ordering between the two blocks
|
| - // and can return.
|
| - if (iter1 == insns1.end() || iter2 == insns2.end()) {
|
| - if (iter1 != insns1.end()) {
|
| - return 1;
|
| - }
|
| - if (iter2 != insns2.end()) {
|
| - return -1;
|
| - }
|
| +CodeGen::Node CodeGen::WithinRange(Node target, size_t range) {
|
| + if (Offset(target) > range) {
|
| + // TODO(mdempsky): If |range > 0|, we might be able to reuse an
|
| + // existing instruction within that range.
|
|
|
| - // If the two blocks are the same length (and have elementwise-equal code
|
| - // and k fields) and their last instructions are neither a JMP nor a RET
|
| - // (which is the only way we can reach this point), then we must compare
|
| - // their successors.
|
| - Instruction* const insns1_last = insns1.back();
|
| - Instruction* const insns2_last = insns2.back();
|
| - CHECK(BPF_CLASS(insns1_last->code) != BPF_JMP &&
|
| - BPF_CLASS(insns1_last->code) != BPF_RET);
|
| + // TODO(mdempsky): If |target| is a branch or return, it might be
|
| + // possible to duplicate that instruction rather than jump to it.
|
|
|
| - // Non jumping instructions will always have a valid next instruction.
|
| - CHECK(insns1_last->next);
|
| - CHECK(insns2_last->next);
|
| - return PointerCompare(blocks.find(insns1_last->next)->second,
|
| - blocks.find(insns2_last->next)->second,
|
| - blocks);
|
| - }
|
| -
|
| - // Compare the individual fields for both instructions.
|
| - const Instruction& insn1 = **iter1;
|
| - const Instruction& insn2 = **iter2;
|
| - if (insn1.code != insn2.code) {
|
| - return insn1.code - insn2.code;
|
| - }
|
| - if (insn1.k != insn2.k) {
|
| - return insn1.k - insn2.k;
|
| - }
|
| -
|
| - // Sanity check: If we're looking at a JMP or RET instruction, by definition
|
| - // it should be the last instruction of the basic block.
|
| - if (BPF_CLASS(insn1.code) == BPF_JMP || BPF_CLASS(insn1.code) == BPF_RET) {
|
| - CHECK_EQ(insns1.back(), &insn1);
|
| - CHECK_EQ(insns2.back(), &insn2);
|
| - }
|
| -
|
| - // RET instructions terminate execution, and only JMP instructions use the
|
| - // jt_ptr and jf_ptr fields. Anything else can continue to the next
|
| - // instruction in the basic block.
|
| - if (BPF_CLASS(insn1.code) == BPF_RET) {
|
| - return 0;
|
| - } else if (BPF_CLASS(insn1.code) != BPF_JMP) {
|
| - continue;
|
| - }
|
| -
|
| - // Recursively compare the "true" and "false" branches.
|
| - // A well-formed BPF program can't have any cycles, so we know
|
| - // that our recursive algorithm will ultimately terminate.
|
| - // In the unlikely event that the programmer made a mistake and
|
| - // went out of the way to give us a cyclic program, we will crash
|
| - // with a stack overflow. We are OK with that.
|
| - if (BPF_OP(insn1.code) != BPF_JA) {
|
| - int c = PointerCompare(blocks.find(insn1.jf_ptr)->second,
|
| - blocks.find(insn2.jf_ptr)->second,
|
| - blocks);
|
| - if (c != 0) {
|
| - return c;
|
| - }
|
| - }
|
| - return PointerCompare(blocks.find(insn1.jt_ptr)->second,
|
| - blocks.find(insn2.jt_ptr)->second,
|
| - blocks);
|
| + // Fall back to emitting a jump instruction.
|
| + target = Append(BPF_JMP | BPF_JA, Offset(target), 0, 0);
|
| }
|
| -}
|
|
|
| -void CodeGen::MergeTails(TargetsToBlocks* blocks) {
|
| - // We enter all of our basic blocks into a set using the BasicBlock::Less()
|
| - // comparator. This naturally results in blocks with identical tails of
|
| - // instructions to map to the same entry in the set. Whenever we discover
|
| - // that a particular chain of instructions is already in the set, we merge
|
| - // the basic blocks and update the pointer in the "blocks" map.
|
| - // Returns the number of unique basic blocks.
|
| - // N.B. We don't merge instructions on a granularity that is finer than
|
| - // a basic block. In practice, this is sufficiently rare that we don't
|
| - // incur a big cost.
|
| - // Similarly, we currently don't merge anything other than tails. In
|
| - // the future, we might decide to revisit this decision and attempt to
|
| - // merge arbitrary sub-sequences of instructions.
|
| - BasicBlock::Less<TargetsToBlocks> less(*blocks, PointerCompare);
|
| - typedef std::set<BasicBlock*, BasicBlock::Less<TargetsToBlocks> > Set;
|
| - Set seen_basic_blocks(less);
|
| - for (TargetsToBlocks::iterator iter = blocks->begin(); iter != blocks->end();
|
| - ++iter) {
|
| - BasicBlock* bb = iter->second;
|
| - Set::const_iterator entry = seen_basic_blocks.find(bb);
|
| - if (entry == seen_basic_blocks.end()) {
|
| - // This is the first time we see this particular sequence of
|
| - // instructions. Enter the basic block into the set of known
|
| - // basic blocks.
|
| - seen_basic_blocks.insert(bb);
|
| - } else {
|
| - // We have previously seen another basic block that defines the same
|
| - // sequence of instructions. Merge the two blocks and update the
|
| - // pointer in the "blocks" map.
|
| - iter->second = *entry;
|
| - }
|
| - }
|
| + CHECK_LE(Offset(target), range) << "ICE: Failed to bring target within range";
|
| + return target;
|
| }
|
|
|
| -void CodeGen::ComputeIncomingBranches(BasicBlock* block,
|
| - const TargetsToBlocks& targets_to_blocks,
|
| - IncomingBranches* incoming_branches) {
|
| - // We increment the number of incoming branches each time we encounter a
|
| - // basic block. But we only traverse recursively the very first time we
|
| - // encounter a new block. This is necessary to make topological sorting
|
| - // work correctly.
|
| - if (++(*incoming_branches)[block] == 1) {
|
| - Instruction* last_insn = block->instructions.back();
|
| - if (BPF_CLASS(last_insn->code) == BPF_JMP) {
|
| - ComputeIncomingBranches(targets_to_blocks.find(last_insn->jt_ptr)->second,
|
| - targets_to_blocks,
|
| - incoming_branches);
|
| - if (BPF_OP(last_insn->code) != BPF_JA) {
|
| - ComputeIncomingBranches(
|
| - targets_to_blocks.find(last_insn->jf_ptr)->second,
|
| - targets_to_blocks,
|
| - incoming_branches);
|
| - }
|
| - } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
|
| - ComputeIncomingBranches(targets_to_blocks.find(last_insn->next)->second,
|
| - targets_to_blocks,
|
| - incoming_branches);
|
| - }
|
| +CodeGen::Node CodeGen::Append(uint16_t code, uint32_t k, size_t jt, size_t jf) {
|
| + if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) {
|
| + CHECK_LE(jt, kBranchRange);
|
| + CHECK_LE(jf, kBranchRange);
|
| + } else {
|
| + CHECK_EQ(0U, jt);
|
| + CHECK_EQ(0U, jf);
|
| }
|
| -}
|
| -
|
| -void CodeGen::TopoSortBasicBlocks(BasicBlock* first_block,
|
| - const TargetsToBlocks& blocks,
|
| - BasicBlocks* basic_blocks) {
|
| - // Textbook implementation of a toposort. We keep looking for basic blocks
|
| - // that don't have any incoming branches (initially, this is just the
|
| - // "first_block") and add them to the topologically sorted list of
|
| - // "basic_blocks". As we do so, we remove outgoing branches. This potentially
|
| - // ends up making our descendants eligible for the sorted list. The
|
| - // sorting algorithm terminates when there are no more basic blocks that have
|
| - // no incoming branches. If we didn't move all blocks from the set of
|
| - // "unordered_blocks" to the sorted list of "basic_blocks", there must have
|
| - // been a cyclic dependency. This should never happen in a BPF program, as
|
| - // well-formed BPF programs only ever have forward branches.
|
| - IncomingBranches unordered_blocks;
|
| - ComputeIncomingBranches(first_block, blocks, &unordered_blocks);
|
|
|
| - std::set<BasicBlock*> heads;
|
| - for (;;) {
|
| - // Move block from "unordered_blocks" to "basic_blocks".
|
| - basic_blocks->push_back(first_block);
|
| -
|
| - // Inspect last instruction in the basic block. This is typically either a
|
| - // jump or a return statement. But it could also be a "normal" instruction
|
| - // that is followed by a jump target.
|
| - Instruction* last_insn = first_block->instructions.back();
|
| - if (BPF_CLASS(last_insn->code) == BPF_JMP) {
|
| - // Remove outgoing branches. This might end up moving our descendants
|
| - // into set of "head" nodes that no longer have any incoming branches.
|
| - TargetsToBlocks::const_iterator iter;
|
| - if (BPF_OP(last_insn->code) != BPF_JA) {
|
| - iter = blocks.find(last_insn->jf_ptr);
|
| - if (!--unordered_blocks[iter->second]) {
|
| - heads.insert(iter->second);
|
| - }
|
| - }
|
| - iter = blocks.find(last_insn->jt_ptr);
|
| - if (!--unordered_blocks[iter->second]) {
|
| - first_block = iter->second;
|
| - continue;
|
| - }
|
| - } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
|
| - // We encountered an instruction that doesn't change code flow. Try to
|
| - // pick the next "first_block" from "last_insn->next", if possible.
|
| - TargetsToBlocks::const_iterator iter;
|
| - iter = blocks.find(last_insn->next);
|
| - if (!--unordered_blocks[iter->second]) {
|
| - first_block = iter->second;
|
| - continue;
|
| - } else {
|
| - // Our basic block is supposed to be followed by "last_insn->next",
|
| - // but dependencies prevent this from happening. Insert a BPF_JA
|
| - // instruction to correct the code flow.
|
| - Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, last_insn->next);
|
| - first_block->instructions.push_back(ja);
|
| - last_insn->next = ja;
|
| - }
|
| - }
|
| - if (heads.empty()) {
|
| - if (unordered_blocks.size() != basic_blocks->size()) {
|
| - SANDBOX_DIE("Internal compiler error; cyclic graph detected");
|
| - }
|
| - return;
|
| - }
|
| - // Proceed by picking an arbitrary node from the set of basic blocks that
|
| - // do not have any incoming branches.
|
| - first_block = *heads.begin();
|
| - heads.erase(heads.begin());
|
| - }
|
| + CHECK_LT(program_.size(), static_cast<size_t>(BPF_MAXINSNS));
|
| + program_.push_back(sock_filter{code, jt, jf, k});
|
| + return program_.size() - 1;
|
| }
|
|
|
| -void CodeGen::ComputeRelativeJumps(BasicBlocks* basic_blocks,
|
| - const TargetsToBlocks& targets_to_blocks) {
|
| - // While we previously used pointers in jt_ptr and jf_ptr to link jump
|
| - // instructions to their targets, we now convert these jumps to relative
|
| - // jumps that are suitable for loading the BPF program into the kernel.
|
| - int offset = 0;
|
| -
|
| - // Since we just completed a toposort, all jump targets are guaranteed to
|
| - // go forward. This means, iterating over the basic blocks in reverse makes
|
| - // it trivial to compute the correct offsets.
|
| - BasicBlock* bb = NULL;
|
| - BasicBlock* last_bb = NULL;
|
| - for (BasicBlocks::reverse_iterator iter = basic_blocks->rbegin();
|
| - iter != basic_blocks->rend();
|
| - ++iter) {
|
| - last_bb = bb;
|
| - bb = *iter;
|
| - Instruction* insn = bb->instructions.back();
|
| - if (BPF_CLASS(insn->code) == BPF_JMP) {
|
| - // Basic block ended in a jump instruction. We can now compute the
|
| - // appropriate offsets.
|
| - if (BPF_OP(insn->code) == BPF_JA) {
|
| - // "Always" jumps use the 32bit "k" field for the offset, instead
|
| - // of the 8bit "jt" and "jf" fields.
|
| - int jmp = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;
|
| - insn->k = jmp;
|
| - insn->jt = insn->jf = 0;
|
| - } else {
|
| - // The offset computations for conditional jumps are just the same
|
| - // as for "always" jumps.
|
| - int jt = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;
|
| - int jf = offset - targets_to_blocks.find(insn->jf_ptr)->second->offset;
|
| -
|
| - // There is an added complication, because conditional relative jumps
|
| - // can only jump at most 255 instructions forward. If we have to jump
|
| - // further, insert an extra "always" jump.
|
| - Instructions::size_type jmp = bb->instructions.size();
|
| - if (jt > 255 || (jt == 255 && jf > 255)) {
|
| - Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jt_ptr);
|
| - bb->instructions.push_back(ja);
|
| - ja->k = jt;
|
| - ja->jt = ja->jf = 0;
|
| -
|
| - // The newly inserted "always" jump, of course, requires us to adjust
|
| - // the jump targets in the original conditional jump.
|
| - jt = 0;
|
| - ++jf;
|
| - }
|
| - if (jf > 255) {
|
| - Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jf_ptr);
|
| - bb->instructions.insert(bb->instructions.begin() + jmp, ja);
|
| - ja->k = jf;
|
| - ja->jt = ja->jf = 0;
|
| -
|
| - // Again, we have to adjust the jump targets in the original
|
| - // conditional jump.
|
| - ++jt;
|
| - jf = 0;
|
| - }
|
| -
|
| - // Now we can finally set the relative jump targets in the conditional
|
| - // jump instruction. Afterwards, we must no longer access the jt_ptr
|
| - // and jf_ptr fields.
|
| - insn->jt = jt;
|
| - insn->jf = jf;
|
| - }
|
| - } else if (BPF_CLASS(insn->code) != BPF_RET &&
|
| - targets_to_blocks.find(insn->next)->second != last_bb) {
|
| - SANDBOX_DIE("Internal compiler error; invalid basic block encountered");
|
| - }
|
| -
|
| - // Proceed to next basic block.
|
| - offset += bb->instructions.size();
|
| - bb->offset = offset;
|
| - }
|
| - return;
|
| +size_t CodeGen::Offset(Node target) const {
|
| + CHECK_LT(target, program_.size()) << "Bogus offset target node";
|
| + return (program_.size() - 1) - target;
|
| }
|
|
|
| -void CodeGen::ConcatenateBasicBlocks(const BasicBlocks& basic_blocks,
|
| - Program* program) {
|
| - // Our basic blocks have been sorted and relative jump offsets have been
|
| - // computed. The last remaining step is for all the instructions in our
|
| - // basic blocks to be concatenated into a BPF program.
|
| - program->clear();
|
| - for (BasicBlocks::const_iterator bb_iter = basic_blocks.begin();
|
| - bb_iter != basic_blocks.end();
|
| - ++bb_iter) {
|
| - const BasicBlock& bb = **bb_iter;
|
| - for (Instructions::const_iterator insn_iter = bb.instructions.begin();
|
| - insn_iter != bb.instructions.end();
|
| - ++insn_iter) {
|
| - const Instruction& insn = **insn_iter;
|
| - program->push_back(
|
| - (struct sock_filter) {insn.code, insn.jt, insn.jf, insn.k});
|
| - }
|
| - }
|
| - return;
|
| -}
|
| -
|
| -void CodeGen::Compile(Instruction* instructions, Program* program) {
|
| - if (compiled_) {
|
| - SANDBOX_DIE(
|
| - "Cannot call Compile() multiple times. Create a new code "
|
| - "generator instead");
|
| - }
|
| - compiled_ = true;
|
| -
|
| - BranchTargets branch_targets;
|
| - FindBranchTargets(*instructions, &branch_targets);
|
| - TargetsToBlocks all_blocks;
|
| - BasicBlock* first_block =
|
| - CutGraphIntoBasicBlocks(instructions, branch_targets, &all_blocks);
|
| - MergeTails(&all_blocks);
|
| - BasicBlocks basic_blocks;
|
| - TopoSortBasicBlocks(first_block, all_blocks, &basic_blocks);
|
| - ComputeRelativeJumps(&basic_blocks, all_blocks);
|
| - ConcatenateBasicBlocks(basic_blocks, program);
|
| - return;
|
| +// TODO(mdempsky): Move into a general base::Tuple helper library.
|
| +bool CodeGen::MemoKeyLess::operator()(const MemoKey& lhs,
|
| + const MemoKey& rhs) const {
|
| + if (lhs.a != rhs.a)
|
| + return lhs.a < rhs.a;
|
| + if (lhs.b != rhs.b)
|
| + return lhs.b < rhs.b;
|
| + if (lhs.c != rhs.c)
|
| + return lhs.c < rhs.c;
|
| + if (lhs.d != rhs.d)
|
| + return lhs.d < rhs.d;
|
| + return false;
|
| }
|
|
|
| } // namespace sandbox
|
|
|