sandbox/linux/seccomp-bpf/codegen.cc - Issue 754433003: Update from https://crrev.com/305340

Side by Side Diff: sandbox/linux/seccomp-bpf/codegen.cc

Issue 754433003: Update from https://crrev.com/305340 (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "sandbox/linux/seccomp-bpf/codegen.h"	5 #include "sandbox/linux/seccomp-bpf/codegen.h"

6	6

7 #include <linux/filter.h>	7 #include <linux/filter.h>

8	8

9 #include <set>	9 #include <limits>

	10 #include <utility>

10	11

11 #include "base/logging.h"	12 #include "base/logging.h"

12 #include "sandbox/linux/seccomp-bpf/basicblock.h"	13

13 #include "sandbox/linux/seccomp-bpf/die.h"	14 // This CodeGen implementation strives for simplicity while still

14 #include "sandbox/linux/seccomp-bpf/instruction.h"	15 // generating acceptable BPF programs under typical usage patterns

	16 // (e.g., by PolicyCompiler).

	17 //

	18 // The key to its simplicity is that BPF programs only support forward

	19 // jumps/branches, which allows constraining the DAG construction API

	20 // to make instruction nodes immutable. Immutable nodes admits a

	21 // simple greedy approach of emitting new instructions as needed and

	22 // then reusing existing ones that have already been emitted. This

	23 // cleanly avoids any need to compute basic blocks or apply

	24 // topological sorting because the API effectively sorts instructions

	25 // for us (e.g., before MakeInstruction() can be called to emit a

	26 // branch instruction, it must have already been called for each

	27 // branch path).

	28 //

	29 // This greedy algorithm is not without (theoretical) weakness though:

	30 //

	31 // 1. In the general case, we don't eliminate dead code. If needed,

	32 // we could trace back through the program in Compile() and elide

	33 // any unneeded instructions, but in practice we only emit live

	34 // instructions anyway.

	35 //

	36 // 2. By not dividing instructions into basic blocks and sorting, we

	37 // lose an opportunity to move non-branch/non-return instructions

	38 // adjacent to their successor instructions, which means we might

	39 // need to emit additional jumps. But in practice, they'll

	40 // already be nearby as long as callers don't go out of their way

	41 // to interleave MakeInstruction() calls for unrelated code

	42 // sequences.

15	43

16 namespace sandbox {	44 namespace sandbox {

17	45

18 // Unfortunately this needs to be defined out-of-line because inline	46 // kBranchRange is the maximum value that can be stored in

19 // initializing a static member to "nullptr" requires "constexpr",	47 // sock_filter's 8-bit jt and jf fields.

20 // which is currently banned by the Chromium style guide.	48 const size_t kBranchRange = std::numeric_limits<uint8_t>::max();

21 const CodeGen::Node CodeGen::kNullNode = nullptr;

22	49

23 CodeGen::CodeGen() : compiled_(false) {}	50 const CodeGen::Node CodeGen::kNullNode;

	51

	52 CodeGen::CodeGen() : program_(), memos_() {

	53 }

24	54

25 CodeGen::~CodeGen() {	55 CodeGen::~CodeGen() {

26 for (Instructions::iterator iter = instructions_.begin();	56 }

27 iter != instructions_.end();	57

28 ++iter) {	58 void CodeGen::Compile(CodeGen::Node head, Program* out) {

29 delete *iter;	59 DCHECK(out);

30 }	60 out->assign(program_.rbegin() + Offset(head), program_.rend());

31 for (BasicBlocks::iterator iter = basic_blocks_.begin();

32 iter != basic_blocks_.end();

33 ++iter) {

34 delete *iter;

35 }

36 }	61 }

37	62

38 CodeGen::Node CodeGen::MakeInstruction(uint16_t code,	63 CodeGen::Node CodeGen::MakeInstruction(uint16_t code,

39 uint32_t k,	64 uint32_t k,

40 Node jt,	65 Node jt,

41 Node jf) {	66 Node jf) {

42 Node insn;	67 // To avoid generating redundant code sequences, we memoize the

43 if (BPF_CLASS(code) == BPF_JMP) {	68 // results from AppendInstruction().

44 CHECK_NE(kNullNode, jt);	69 auto res = memos_.insert(std::make_pair(MemoKey(code, k, jt, jf), kNullNode));

45 if (BPF_OP(code) == BPF_JA) {	70 CodeGen::Node* node = &res.first->second;

46 CHECK_EQ(kNullNode, jf);	71 if (res.second) { // Newly inserted memo entry.

47 } else {	72 *node = AppendInstruction(code, k, jt, jf);

48 CHECK_NE(kNullNode, jf);

49 }

50 insn = new Instruction(code, k, jt, jf);

51 } else {

52 if (BPF_CLASS(code) == BPF_RET) {

53 CHECK_EQ(kNullNode, jt);

54 } else {

55 CHECK_NE(kNullNode, jt);

56 }

57 CHECK_EQ(kNullNode, jf);

58 insn = new Instruction(code, k, jt);

59 }	73 }

60 instructions_.push_back(insn);	74 return *node;

61 return insn;

62 }	75 }

63	76

64 void CodeGen::FindBranchTargets(const Instruction& instructions,	77 CodeGen::Node CodeGen::AppendInstruction(uint16_t code,

65 BranchTargets* branch_targets) {	78 uint32_t k,

66 // Follow all possible paths through the "instructions" graph and compute	79 Node jt,

67 // a list of branch targets. This will later be needed to compute the	80 Node jf) {

68 // boundaries of basic blocks.	81 if (BPF_CLASS(code) == BPF_JMP) {

69 // We maintain a set of all instructions that we have previously seen. This	82 CHECK_NE(BPF_JA, BPF_OP(code)) << "CodeGen inserts JAs as needed";

70 // set ultimately converges on all instructions in the program.	83

71 std::set<const Instruction*> seen_instructions;	84 // We need to check \|jt\| twice because it might get pushed

72 Instructions stack;	85 // out-of-range by appending a jump for \|jf\|.

73 for (const Instruction* insn = &instructions; insn;) {	86 jt = WithinRange(jt, kBranchRange);

74 seen_instructions.insert(insn);	87 jf = WithinRange(jf, kBranchRange);

75 if (BPF_CLASS(insn->code) == BPF_JMP) {	88 jt = WithinRange(jt, kBranchRange);

76 // Found a jump. Increase count of incoming edges for each of the jump	89 return Append(code, k, Offset(jt), Offset(jf));

77 // targets.

78 ++(*branch_targets)[insn->jt_ptr];

79 if (BPF_OP(insn->code) != BPF_JA) {

80 ++(*branch_targets)[insn->jf_ptr];

81 stack.push_back(const_cast<Instruction*>(insn));

82 }

83 // Start a recursive decent for depth-first traversal.

84 if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {

85 // We haven't seen the "true" branch yet. Traverse it now. We have

86 // already remembered the "false" branch on the stack and will

87 // traverse it later.

88 insn = insn->jt_ptr;

89 continue;

90 } else {

91 // Now try traversing the "false" branch.

92 insn = NULL;

93 }

94 } else {

95 // This is a non-jump instruction, just continue to the next instruction

96 // (if any). It's OK if "insn" becomes NULL when reaching a return

97 // instruction.

98 if (!insn->next != (BPF_CLASS(insn->code) == BPF_RET)) {

99 SANDBOX_DIE(

100 "Internal compiler error; return instruction must be at "

101 "the end of the BPF program");

102 }

103 if (seen_instructions.find(insn->next) == seen_instructions.end()) {

104 insn = insn->next;

105 } else {

106 // We have seen this instruction before. That could happen if it is

107 // a branch target. No need to continue processing.

108 insn = NULL;

109 }

110 }

111 while (!insn && !stack.empty()) {

112 // We are done processing all the way to a leaf node, backtrack up the

113 // stack to any branches that we haven't processed yet. By definition,

114 // this has to be a "false" branch, as we always process the "true"

115 // branches right away.

116 insn = stack.back();

117 stack.pop_back();

118 if (seen_instructions.find(insn->jf_ptr) == seen_instructions.end()) {

119 // We haven't seen the "false" branch yet. So, that's where we'll

120 // go now.

121 insn = insn->jf_ptr;

122 } else {

123 // We have seen both the "true" and the "false" branch, continue

124 // up the stack.

125 if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {

126 SANDBOX_DIE(

127 "Internal compiler error; cannot find all "

128 "branch targets");

129 }

130 insn = NULL;

131 }

132 }

133 }	90 }

134 return;	91

	92 CHECK_EQ(kNullNode, jf) << "Non-branch instructions shouldn't provide jf";

	93 if (BPF_CLASS(code) == BPF_RET) {

	94 CHECK_EQ(kNullNode, jt) << "Return instructions shouldn't provide jt";

	95 } else {

	96 // For non-branch/non-return instructions, execution always

	97 // proceeds to the next instruction; so we need to arrange for

	98 // that to be \|jt\|.

	99 jt = WithinRange(jt, 0);

	100 }

	101 return Append(code, k, 0, 0);

135 }	102 }

136	103

137 BasicBlock* CodeGen::MakeBasicBlock(Instruction* head, Instruction* tail) {	104 CodeGen::Node CodeGen::WithinRange(Node target, size_t range) {

138 // Iterate over all the instructions between "head" and "tail" and	105 if (Offset(target) > range) {

139 // insert them into a new basic block.	106 // TODO(mdempsky): If \|range > 0\|, we might be able to reuse an

140 BasicBlock* bb = new BasicBlock;	107 // existing instruction within that range.

141 for (;; head = head->next) {	108

142 bb->instructions.push_back(head);	109 // TODO(mdempsky): If \|target\| is a branch or return, it might be

143 if (head == tail) {	110 // possible to duplicate that instruction rather than jump to it.

144 break;	111

145 }	112 // Fall back to emitting a jump instruction.

146 if (BPF_CLASS(head->code) == BPF_JMP) {	113 target = Append(BPF_JMP \| BPF_JA, Offset(target), 0, 0);

147 SANDBOX_DIE("Found a jump inside of a basic block");

148 }

149 }	114 }

150 basic_blocks_.push_back(bb);	115

151 return bb;	116 CHECK_LE(Offset(target), range) << "ICE: Failed to bring target within range";

	117 return target;

152 }	118 }

153	119

154 void CodeGen::AddBasicBlock(Instruction* head,	120 CodeGen::Node CodeGen::Append(uint16_t code, uint32_t k, size_t jt, size_t jf) {

155 Instruction* tail,	121 if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) {

156 const BranchTargets& branch_targets,	122 CHECK_LE(jt, kBranchRange);

157 TargetsToBlocks* basic_blocks,	123 CHECK_LE(jf, kBranchRange);

158 BasicBlock** firstBlock) {	124 } else {

159 // Add a new basic block to "basic_blocks". Also set "firstBlock", if it	125 CHECK_EQ(0U, jt);

160 // has not been set before.	126 CHECK_EQ(0U, jf);

161 BranchTargets::const_iterator iter = branch_targets.find(head);

162 if ((iter == branch_targets.end()) != !*firstBlock \|\|

163 !*firstBlock != basic_blocks->empty()) {

164 SANDBOX_DIE(

165 "Only the very first basic block should have no "

166 "incoming jumps");

167 }	127 }

168 BasicBlock* bb = MakeBasicBlock(head, tail);	128

169 if (!*firstBlock) {	129 CHECK_LT(program_.size(), static_cast<size_t>(BPF_MAXINSNS));

170 *firstBlock = bb;	130 program_.push_back(sock_filter{code, jt, jf, k});

171 }	131 return program_.size() - 1;

172 (*basic_blocks)[head] = bb;

173 return;

174 }	132 }

175	133

176 BasicBlock* CodeGen::CutGraphIntoBasicBlocks(	134 size_t CodeGen::Offset(Node target) const {

177 Instruction* instructions,	135 CHECK_LT(target, program_.size()) << "Bogus offset target node";

178 const BranchTargets& branch_targets,	136 return (program_.size() - 1) - target;

179 TargetsToBlocks* basic_blocks) {

180 // Textbook implementation of a basic block generator. All basic blocks

181 // start with a branch target and end with either a return statement or

182 // a jump (or are followed by an instruction that forms the beginning of a

183 // new block). Both conditional and "always" jumps are supported.

184 BasicBlock* first_block = NULL;

185 std::set<const Instruction*> seen_instructions;

186 Instructions stack;

187 Instruction* tail = NULL;

188 Instruction* head = instructions;

189 for (Instruction* insn = head; insn;) {

190 if (seen_instructions.find(insn) != seen_instructions.end()) {

191 // We somehow went in a circle. This should never be possible. Not even

192 // cyclic graphs are supposed to confuse us this much.

193 SANDBOX_DIE("Internal compiler error; cannot compute basic blocks");

194 }

195 seen_instructions.insert(insn);

196 if (tail && branch_targets.find(insn) != branch_targets.end()) {

197 // We reached a branch target. Start a new basic block (this means,

198 // flushing the previous basic block first).

199 AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);

200 head = insn;

201 }

202 if (BPF_CLASS(insn->code) == BPF_JMP) {

203 // We reached a jump instruction, this completes our current basic

204 // block. Flush it and continue by traversing both the true and the

205 // false branch of the jump. We need to maintain a stack to do so.

206 AddBasicBlock(head, insn, branch_targets, basic_blocks, &first_block);

207 if (BPF_OP(insn->code) != BPF_JA) {

208 stack.push_back(insn->jf_ptr);

209 }

210 insn = insn->jt_ptr;

211

212 // If we are jumping to an instruction that we have previously

213 // processed, we are done with this branch. Continue by backtracking

214 // up the stack.

215 while (seen_instructions.find(insn) != seen_instructions.end()) {

216 backtracking:

217 if (stack.empty()) {

218 // We successfully traversed all reachable instructions.

219 return first_block;

220 } else {

221 // Going up the stack.

222 insn = stack.back();

223 stack.pop_back();

224 }

225 }

226 // Starting a new basic block.

227 tail = NULL;

228 head = insn;

229 } else {

230 // We found a non-jumping instruction, append it to current basic

231 // block.

232 tail = insn;

233 insn = insn->next;

234 if (!insn) {

235 // We reached a return statement, flush the current basic block and

236 // backtrack up the stack.

237 AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);

238 goto backtracking;

239 }

240 }

241 }

242 return first_block;

243 }	137 }

244	138

245 // We define a comparator that inspects the sequence of instructions in our	139 // TODO(mdempsky): Move into a general base::Tuple helper library.

246 // basic block and any blocks referenced by this block. This function can be	140 bool CodeGen::MemoKeyLess::operator()(const MemoKey& lhs,

247 // used in a "less" comparator for the purpose of storing pointers to basic	141 const MemoKey& rhs) const {

248 // blocks in STL containers; this gives an easy option to use STL to find	142 if (lhs.a != rhs.a)

249 // shared tail sequences of basic blocks.	143 return lhs.a < rhs.a;

250 static int PointerCompare(const BasicBlock* block1,	144 if (lhs.b != rhs.b)

251 const BasicBlock* block2,	145 return lhs.b < rhs.b;

252 const TargetsToBlocks& blocks) {	146 if (lhs.c != rhs.c)

253 // Return <0, 0, or >0 depending on the ordering of "block1" and "block2".	147 return lhs.c < rhs.c;

254 // If we are looking at the exact same block, this is trivial and we don't	148 if (lhs.d != rhs.d)

255 // need to do a full comparison.	149 return lhs.d < rhs.d;

256 if (block1 == block2) {	150 return false;

257 return 0;

258 }

259

260 // We compare the sequence of instructions in both basic blocks.

261 const Instructions& insns1 = block1->instructions;

262 const Instructions& insns2 = block2->instructions;

263 // Basic blocks should never be empty.

264 CHECK(!insns1.empty());

265 CHECK(!insns2.empty());

266

267 Instructions::const_iterator iter1 = insns1.begin();

268 Instructions::const_iterator iter2 = insns2.begin();

269 for (;; ++iter1, ++iter2) {

270 // If we have reached the end of the sequence of instructions in one or

271 // both basic blocks, we know the relative ordering between the two blocks

272 // and can return.

273 if (iter1 == insns1.end() \|\| iter2 == insns2.end()) {

274 if (iter1 != insns1.end()) {

275 return 1;

276 }

277 if (iter2 != insns2.end()) {

278 return -1;

279 }

280

281 // If the two blocks are the same length (and have elementwise-equal code

282 // and k fields) and their last instructions are neither a JMP nor a RET

283 // (which is the only way we can reach this point), then we must compare

284 // their successors.

285 Instruction* const insns1_last = insns1.back();

286 Instruction* const insns2_last = insns2.back();

287 CHECK(BPF_CLASS(insns1_last->code) != BPF_JMP &&

288 BPF_CLASS(insns1_last->code) != BPF_RET);

289

290 // Non jumping instructions will always have a valid next instruction.

291 CHECK(insns1_last->next);

292 CHECK(insns2_last->next);

293 return PointerCompare(blocks.find(insns1_last->next)->second,

294 blocks.find(insns2_last->next)->second,

295 blocks);

296 }

297

298 // Compare the individual fields for both instructions.

299 const Instruction& insn1 = **iter1;

300 const Instruction& insn2 = **iter2;

301 if (insn1.code != insn2.code) {

302 return insn1.code - insn2.code;

303 }

304 if (insn1.k != insn2.k) {

305 return insn1.k - insn2.k;

306 }

307

308 // Sanity check: If we're looking at a JMP or RET instruction, by definition

309 // it should be the last instruction of the basic block.

310 if (BPF_CLASS(insn1.code) == BPF_JMP \|\| BPF_CLASS(insn1.code) == BPF_RET) {

311 CHECK_EQ(insns1.back(), &insn1);

312 CHECK_EQ(insns2.back(), &insn2);

313 }

314

315 // RET instructions terminate execution, and only JMP instructions use the

316 // jt_ptr and jf_ptr fields. Anything else can continue to the next

317 // instruction in the basic block.

318 if (BPF_CLASS(insn1.code) == BPF_RET) {

319 return 0;

320 } else if (BPF_CLASS(insn1.code) != BPF_JMP) {

321 continue;

322 }

323

324 // Recursively compare the "true" and "false" branches.

325 // A well-formed BPF program can't have any cycles, so we know

326 // that our recursive algorithm will ultimately terminate.

327 // In the unlikely event that the programmer made a mistake and

328 // went out of the way to give us a cyclic program, we will crash

329 // with a stack overflow. We are OK with that.

330 if (BPF_OP(insn1.code) != BPF_JA) {

331 int c = PointerCompare(blocks.find(insn1.jf_ptr)->second,

332 blocks.find(insn2.jf_ptr)->second,

333 blocks);

334 if (c != 0) {

335 return c;

336 }

337 }

338 return PointerCompare(blocks.find(insn1.jt_ptr)->second,

339 blocks.find(insn2.jt_ptr)->second,

340 blocks);

341 }

342 }

343

344 void CodeGen::MergeTails(TargetsToBlocks* blocks) {

345 // We enter all of our basic blocks into a set using the BasicBlock::Less()

346 // comparator. This naturally results in blocks with identical tails of

347 // instructions to map to the same entry in the set. Whenever we discover

348 // that a particular chain of instructions is already in the set, we merge

349 // the basic blocks and update the pointer in the "blocks" map.

350 // Returns the number of unique basic blocks.

351 // N.B. We don't merge instructions on a granularity that is finer than

352 // a basic block. In practice, this is sufficiently rare that we don't

353 // incur a big cost.

354 // Similarly, we currently don't merge anything other than tails. In

355 // the future, we might decide to revisit this decision and attempt to

356 // merge arbitrary sub-sequences of instructions.

357 BasicBlock::Less<TargetsToBlocks> less(*blocks, PointerCompare);

358 typedef std::set<BasicBlock*, BasicBlock::Less<TargetsToBlocks> > Set;

359 Set seen_basic_blocks(less);

360 for (TargetsToBlocks::iterator iter = blocks->begin(); iter != blocks->end();

361 ++iter) {

362 BasicBlock* bb = iter->second;

363 Set::const_iterator entry = seen_basic_blocks.find(bb);

364 if (entry == seen_basic_blocks.end()) {

365 // This is the first time we see this particular sequence of

366 // instructions. Enter the basic block into the set of known

367 // basic blocks.

368 seen_basic_blocks.insert(bb);

369 } else {

370 // We have previously seen another basic block that defines the same

371 // sequence of instructions. Merge the two blocks and update the

372 // pointer in the "blocks" map.

373 iter->second = *entry;

374 }

375 }

376 }

377

378 void CodeGen::ComputeIncomingBranches(BasicBlock* block,

379 const TargetsToBlocks& targets_to_blocks,

380 IncomingBranches* incoming_branches) {

381 // We increment the number of incoming branches each time we encounter a

382 // basic block. But we only traverse recursively the very first time we

383 // encounter a new block. This is necessary to make topological sorting

384 // work correctly.

385 if (++(*incoming_branches)[block] == 1) {

386 Instruction* last_insn = block->instructions.back();

387 if (BPF_CLASS(last_insn->code) == BPF_JMP) {

388 ComputeIncomingBranches(targets_to_blocks.find(last_insn->jt_ptr)->second,

389 targets_to_blocks,

390 incoming_branches);

391 if (BPF_OP(last_insn->code) != BPF_JA) {

392 ComputeIncomingBranches(

393 targets_to_blocks.find(last_insn->jf_ptr)->second,

394 targets_to_blocks,

395 incoming_branches);

396 }

397 } else if (BPF_CLASS(last_insn->code) != BPF_RET) {

398 ComputeIncomingBranches(targets_to_blocks.find(last_insn->next)->second,

399 targets_to_blocks,

400 incoming_branches);

401 }

402 }

403 }

404

405 void CodeGen::TopoSortBasicBlocks(BasicBlock* first_block,

406 const TargetsToBlocks& blocks,

407 BasicBlocks* basic_blocks) {

408 // Textbook implementation of a toposort. We keep looking for basic blocks

409 // that don't have any incoming branches (initially, this is just the

410 // "first_block") and add them to the topologically sorted list of

411 // "basic_blocks". As we do so, we remove outgoing branches. This potentially

412 // ends up making our descendants eligible for the sorted list. The

413 // sorting algorithm terminates when there are no more basic blocks that have

414 // no incoming branches. If we didn't move all blocks from the set of

415 // "unordered_blocks" to the sorted list of "basic_blocks", there must have

416 // been a cyclic dependency. This should never happen in a BPF program, as

417 // well-formed BPF programs only ever have forward branches.

418 IncomingBranches unordered_blocks;

419 ComputeIncomingBranches(first_block, blocks, &unordered_blocks);

420

421 std::set<BasicBlock*> heads;

422 for (;;) {

423 // Move block from "unordered_blocks" to "basic_blocks".

424 basic_blocks->push_back(first_block);

425

426 // Inspect last instruction in the basic block. This is typically either a

427 // jump or a return statement. But it could also be a "normal" instruction

428 // that is followed by a jump target.

429 Instruction* last_insn = first_block->instructions.back();

430 if (BPF_CLASS(last_insn->code) == BPF_JMP) {

431 // Remove outgoing branches. This might end up moving our descendants

432 // into set of "head" nodes that no longer have any incoming branches.

433 TargetsToBlocks::const_iterator iter;

434 if (BPF_OP(last_insn->code) != BPF_JA) {

435 iter = blocks.find(last_insn->jf_ptr);

436 if (!--unordered_blocks[iter->second]) {

437 heads.insert(iter->second);

438 }

439 }

440 iter = blocks.find(last_insn->jt_ptr);

441 if (!--unordered_blocks[iter->second]) {

442 first_block = iter->second;

443 continue;

444 }

445 } else if (BPF_CLASS(last_insn->code) != BPF_RET) {

446 // We encountered an instruction that doesn't change code flow. Try to

447 // pick the next "first_block" from "last_insn->next", if possible.

448 TargetsToBlocks::const_iterator iter;

449 iter = blocks.find(last_insn->next);

450 if (!--unordered_blocks[iter->second]) {

451 first_block = iter->second;

452 continue;

453 } else {

454 // Our basic block is supposed to be followed by "last_insn->next",

455 // but dependencies prevent this from happening. Insert a BPF_JA

456 // instruction to correct the code flow.

457 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, last_insn->next);

458 first_block->instructions.push_back(ja);

459 last_insn->next = ja;

460 }

461 }

462 if (heads.empty()) {

463 if (unordered_blocks.size() != basic_blocks->size()) {

464 SANDBOX_DIE("Internal compiler error; cyclic graph detected");

465 }

466 return;

467 }

468 // Proceed by picking an arbitrary node from the set of basic blocks that

469 // do not have any incoming branches.

470 first_block = *heads.begin();

471 heads.erase(heads.begin());

472 }

473 }

474

475 void CodeGen::ComputeRelativeJumps(BasicBlocks* basic_blocks,

476 const TargetsToBlocks& targets_to_blocks) {

477 // While we previously used pointers in jt_ptr and jf_ptr to link jump

478 // instructions to their targets, we now convert these jumps to relative

479 // jumps that are suitable for loading the BPF program into the kernel.

480 int offset = 0;

481

482 // Since we just completed a toposort, all jump targets are guaranteed to

483 // go forward. This means, iterating over the basic blocks in reverse makes

484 // it trivial to compute the correct offsets.

485 BasicBlock* bb = NULL;

486 BasicBlock* last_bb = NULL;

487 for (BasicBlocks::reverse_iterator iter = basic_blocks->rbegin();

488 iter != basic_blocks->rend();

489 ++iter) {

490 last_bb = bb;

491 bb = *iter;

492 Instruction* insn = bb->instructions.back();

493 if (BPF_CLASS(insn->code) == BPF_JMP) {

494 // Basic block ended in a jump instruction. We can now compute the

495 // appropriate offsets.

496 if (BPF_OP(insn->code) == BPF_JA) {

497 // "Always" jumps use the 32bit "k" field for the offset, instead

498 // of the 8bit "jt" and "jf" fields.

499 int jmp = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;

500 insn->k = jmp;

501 insn->jt = insn->jf = 0;

502 } else {

503 // The offset computations for conditional jumps are just the same

504 // as for "always" jumps.

505 int jt = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;

506 int jf = offset - targets_to_blocks.find(insn->jf_ptr)->second->offset;

507

508 // There is an added complication, because conditional relative jumps

509 // can only jump at most 255 instructions forward. If we have to jump

510 // further, insert an extra "always" jump.

511 Instructions::size_type jmp = bb->instructions.size();

512 if (jt > 255 \|\| (jt == 255 && jf > 255)) {

513 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jt_ptr);

514 bb->instructions.push_back(ja);

515 ja->k = jt;

516 ja->jt = ja->jf = 0;

517

518 // The newly inserted "always" jump, of course, requires us to adjust

519 // the jump targets in the original conditional jump.

520 jt = 0;

521 ++jf;

522 }

523 if (jf > 255) {

524 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jf_ptr);

525 bb->instructions.insert(bb->instructions.begin() + jmp, ja);

526 ja->k = jf;

527 ja->jt = ja->jf = 0;

528

529 // Again, we have to adjust the jump targets in the original

530 // conditional jump.

531 ++jt;

532 jf = 0;

533 }

534

535 // Now we can finally set the relative jump targets in the conditional

536 // jump instruction. Afterwards, we must no longer access the jt_ptr

537 // and jf_ptr fields.

538 insn->jt = jt;

539 insn->jf = jf;

540 }

541 } else if (BPF_CLASS(insn->code) != BPF_RET &&

542 targets_to_blocks.find(insn->next)->second != last_bb) {

543 SANDBOX_DIE("Internal compiler error; invalid basic block encountered");

544 }

545

546 // Proceed to next basic block.

547 offset += bb->instructions.size();

548 bb->offset = offset;

549 }

550 return;

551 }

552

553 void CodeGen::ConcatenateBasicBlocks(const BasicBlocks& basic_blocks,

554 Program* program) {

555 // Our basic blocks have been sorted and relative jump offsets have been

556 // computed. The last remaining step is for all the instructions in our

557 // basic blocks to be concatenated into a BPF program.

558 program->clear();

559 for (BasicBlocks::const_iterator bb_iter = basic_blocks.begin();

560 bb_iter != basic_blocks.end();

561 ++bb_iter) {

562 const BasicBlock& bb = **bb_iter;

563 for (Instructions::const_iterator insn_iter = bb.instructions.begin();

564 insn_iter != bb.instructions.end();

565 ++insn_iter) {

566 const Instruction& insn = **insn_iter;

567 program->push_back(

568 (struct sock_filter) {insn.code, insn.jt, insn.jf, insn.k});

569 }

570 }

571 return;

572 }

573

574 void CodeGen::Compile(Instruction* instructions, Program* program) {

575 if (compiled_) {

576 SANDBOX_DIE(

577 "Cannot call Compile() multiple times. Create a new code "

578 "generator instead");

579 }

580 compiled_ = true;

581

582 BranchTargets branch_targets;

583 FindBranchTargets(*instructions, &branch_targets);

584 TargetsToBlocks all_blocks;

585 BasicBlock* first_block =

586 CutGraphIntoBasicBlocks(instructions, branch_targets, &all_blocks);

587 MergeTails(&all_blocks);

588 BasicBlocks basic_blocks;

589 TopoSortBasicBlocks(first_block, all_blocks, &basic_blocks);

590 ComputeRelativeJumps(&basic_blocks, all_blocks);

591 ConcatenateBasicBlocks(basic_blocks, program);

592 return;

593 }	151 }

594	152

595 } // namespace sandbox	153 } // namespace sandbox

OLD	NEW

« no previous file with comments | « sandbox/linux/seccomp-bpf/codegen.h ('k') | sandbox/linux/seccomp-bpf/codegen_unittest.cc » ('j') | no next file with comments »