sandbox/linux/seccomp-bpf/codegen.cc - Issue 699633003: CodeGen: rewrite implementation [3/3]

Side by Side Diff: sandbox/linux/seccomp-bpf/codegen.cc

Issue 699633003: CodeGen: rewrite implementation [3/3] (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@codegen-api-2

Patch Set: Use size_t instead of uint32_t as semantically appropriate Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "sandbox/linux/seccomp-bpf/codegen.h"	5 #include "sandbox/linux/seccomp-bpf/codegen.h"

6	6

7 #include <linux/filter.h>	7 #include <linux/filter.h>

8	8

9 #include <set>	9 #include <utility>

10	10

11 #include "base/logging.h"	11 #include "base/logging.h"

12 #include "sandbox/linux/seccomp-bpf/basicblock.h"

13 #include "sandbox/linux/seccomp-bpf/die.h"

14 #include "sandbox/linux/seccomp-bpf/instruction.h"

15	12

16 namespace sandbox {	13 namespace sandbox {

17	14

18 // Unfortunately this needs to be defined out-of-line because inline	15 const CodeGen::Addr CodeGen::kNullAddr;

19 // initializing a static member to "nullptr" requires "constexpr",

20 // which is currently banned by the Chromium style guide.

21 const CodeGen::Addr CodeGen::kNullAddr = nullptr;

22	16

23 CodeGen::CodeGen() : compiled_(false) {}	17 CodeGen::CodeGen() : program_(), cache_() {

	18 }

	19 CodeGen::~CodeGen() {

	20 }

24	21

25 CodeGen::~CodeGen() {	22 void CodeGen::Compile(CodeGen::Addr head, Program* out) {

26 for (Instructions::iterator iter = instructions_.begin();	23 out->assign(program_.rbegin() + Offset(head), program_.rend());
	rickyz (no longer on Chrome) 2014/11/12 23:43:50 Will head always end up being the most recently ad Will head always end up being the most recently added instruction? Might be worth removing the parameter later if so. mdempsky 2014/11/18 01:07:30 In practice it will be, but I feel a bit safer mak Show quoted text On 2014/11/12 23:43:50, rickyz wrote: > Will head always end up being the most recently added instruction? Might be > worth removing the parameter later if so. In practice it will be, but I feel a bit safer making it explicit, and it makes describing the API a bit simpler too IMO.
27 iter != instructions_.end();

28 ++iter) {

29 delete *iter;

30 }

31 for (BasicBlocks::iterator iter = basic_blocks_.begin();

32 iter != basic_blocks_.end();

33 ++iter) {

34 delete *iter;

35 }

36 }	24 }

37	25

38 CodeGen::Addr CodeGen::MakeInstruction(uint16_t code,	26 CodeGen::Addr CodeGen::MakeInstruction(uint16_t code,

39 uint32_t k,	27 uint32_t k,

40 Addr jt,	28 Addr jt,

41 Addr jf) {	29 Addr jf) {

42 Instruction* insn;

43 if (BPF_CLASS(code) == BPF_JMP) {	30 if (BPF_CLASS(code) == BPF_JMP) {

44 CHECK_NE(kNullAddr, jt);	31 CHECK_NE(BPF_JA, BPF_OP(code));

45 if (BPF_OP(code) == BPF_JA) {	32 CheckValid(jt);

46 CHECK_EQ(kNullAddr, jf);	33 CheckValid(jf);

47 } else {

48 CHECK_NE(kNullAddr, jf);

49 }

50 insn = new Instruction(code, k, jt, jf);

51 } else {	34 } else {

52 if (BPF_CLASS(code) == BPF_RET) {	35 if (BPF_CLASS(code) == BPF_RET) {

53 CHECK_EQ(kNullAddr, jt);	36 CHECK_EQ(kNullAddr, jt);

54 } else {	37 } else {

55 CHECK_NE(kNullAddr, jt);	38 CheckValid(jt);

56 }	39 }

57 CHECK_EQ(kNullAddr, jf);	40 CHECK_EQ(kNullAddr, jf);

58 insn = new Instruction(code, k, jt);

59 }	41 }

60 instructions_.push_back(insn);	42

61 return insn;	43 // Lookup the cache entry, inserting a new one if necessary.

	44 auto it = cache_.insert(std::make_pair(CacheKey(code, k, jt, jf), kNullAddr));

	45 CodeGen::Addr& res = it.first->second;

	46 const bool miss = it.second;

	47

	48 if (miss) {

	49 if (BPF_CLASS(code) == BPF_JMP) {

	50 // Ensure branch targets are within range.

	51 if (Offset(jt) >= 255) {
	rickyz (no longer on Chrome) 2014/11/12 23:43:50 Should this be 256? Should this be 256? mdempsky 2014/11/18 01:07:30 Sort of. It was 255 so I wouldn't need to worry a Show quoted text On 2014/11/12 23:43:50, rickyz wrote: > Should this be 256? Sort of. It was 255 so I wouldn't need to worry about rechecking it if we need to add a Jump for jf. I've changed it to use a loop and added an explanatory comment. Also added a kBranchRange constant to eliminate the magic 255/256.
	52 jt = Jump(jt);

	53 }

	54 if (Offset(jf) >= 256) {

	55 jf = Jump(jf);

	56 }

	57 res = Append(code, k, Offset(jt), Offset(jf));

	58 } else {

	59 if (BPF_CLASS(code) != BPF_RET && Offset(jt) != 0) {

	60 jt = Jump(jt);

	61 CHECK_EQ(0U, Offset(jt));

	62 }

	63 res = Append(code, k, 0, 0);

	64 }

	65 }

	66 return res;

62 }	67 }

63	68

64 void CodeGen::FindBranchTargets(const Instruction& instructions,	69 size_t CodeGen::Offset(Addr target) const {

65 BranchTargets* branch_targets) {	70 CheckValid(target);

66 // Follow all possible paths through the "instructions" graph and compute	71 return (program_.size() - 1) - target;

67 // a list of branch targets. This will later be needed to compute the

68 // boundaries of basic blocks.

69 // We maintain a set of all instructions that we have previously seen. This

70 // set ultimately converges on all instructions in the program.

71 std::set<const Instruction*> seen_instructions;

72 Instructions stack;

73 for (const Instruction* insn = &instructions; insn;) {

74 seen_instructions.insert(insn);

75 if (BPF_CLASS(insn->code) == BPF_JMP) {

76 // Found a jump. Increase count of incoming edges for each of the jump

77 // targets.

78 ++(*branch_targets)[insn->jt_ptr];

79 if (BPF_OP(insn->code) != BPF_JA) {

80 ++(*branch_targets)[insn->jf_ptr];

81 stack.push_back(const_cast<Instruction*>(insn));

82 }

83 // Start a recursive decent for depth-first traversal.

84 if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {

85 // We haven't seen the "true" branch yet. Traverse it now. We have

86 // already remembered the "false" branch on the stack and will

87 // traverse it later.

88 insn = insn->jt_ptr;

89 continue;

90 } else {

91 // Now try traversing the "false" branch.

92 insn = NULL;

93 }

94 } else {

95 // This is a non-jump instruction, just continue to the next instruction

96 // (if any). It's OK if "insn" becomes NULL when reaching a return

97 // instruction.

98 if (!insn->next != (BPF_CLASS(insn->code) == BPF_RET)) {

99 SANDBOX_DIE(

100 "Internal compiler error; return instruction must be at "

101 "the end of the BPF program");

102 }

103 if (seen_instructions.find(insn->next) == seen_instructions.end()) {

104 insn = insn->next;

105 } else {

106 // We have seen this instruction before. That could happen if it is

107 // a branch target. No need to continue processing.

108 insn = NULL;

109 }

110 }

111 while (!insn && !stack.empty()) {

112 // We are done processing all the way to a leaf node, backtrack up the

113 // stack to any branches that we haven't processed yet. By definition,

114 // this has to be a "false" branch, as we always process the "true"

115 // branches right away.

116 insn = stack.back();

117 stack.pop_back();

118 if (seen_instructions.find(insn->jf_ptr) == seen_instructions.end()) {

119 // We haven't seen the "false" branch yet. So, that's where we'll

120 // go now.

121 insn = insn->jf_ptr;

122 } else {

123 // We have seen both the "true" and the "false" branch, continue

124 // up the stack.

125 if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {

126 SANDBOX_DIE(

127 "Internal compiler error; cannot find all "

128 "branch targets");

129 }

130 insn = NULL;

131 }

132 }

133 }

134 return;

135 }	72 }

136	73

137 BasicBlock* CodeGen::MakeBasicBlock(Instruction* head, Instruction* tail) {	74 CodeGen::Addr CodeGen::Jump(Addr target) {

138 // Iterate over all the instructions between "head" and "tail" and	75 return Append(BPF_JMP \| BPF_JA, Offset(target), 0, 0);

139 // insert them into a new basic block.

140 BasicBlock* bb = new BasicBlock;

141 for (;; head = head->next) {

142 bb->instructions.push_back(head);

143 if (head == tail) {

144 break;

145 }

146 if (BPF_CLASS(head->code) == BPF_JMP) {

147 SANDBOX_DIE("Found a jump inside of a basic block");

148 }

149 }

150 basic_blocks_.push_back(bb);

151 return bb;

152 }	76 }

153	77

154 void CodeGen::AddBasicBlock(Instruction* head,	78 CodeGen::Addr CodeGen::Append(uint16_t code,

155 Instruction* tail,	79 uint32_t k,

156 const BranchTargets& branch_targets,	80 size_t jt,

157 TargetsToBlocks* basic_blocks,	81 size_t jf) {

158 BasicBlock** firstBlock) {	82 if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) {

159 // Add a new basic block to "basic_blocks". Also set "firstBlock", if it	83 CHECK_LT(jt, 256U);

160 // has not been set before.	84 CHECK_LT(jf, 256U);

161 BranchTargets::const_iterator iter = branch_targets.find(head);	85 } else {

162 if ((iter == branch_targets.end()) != !*firstBlock \|\|	86 CHECK_EQ(0U, jt);

163 !*firstBlock != basic_blocks->empty()) {	87 CHECK_EQ(0U, jf);

164 SANDBOX_DIE(

165 "Only the very first basic block should have no "

166 "incoming jumps");

167 }	88 }

168 BasicBlock* bb = MakeBasicBlock(head, tail);	89

169 if (!*firstBlock) {	90 program_.push_back((sock_filter){code, jt, jf, k});
	rickyz (no longer on Chrome) 2014/11/12 23:43:50 Might be good to CHECK_LE(program_.size(), BPF_MAX Might be good to CHECK_LE(program_.size(), BPF_MAXINSNS) here. mdempsky 2014/11/18 01:07:30 Done. (I added a CHECK_LT before the push_back.) Show quoted text On 2014/11/12 23:43:50, rickyz wrote: > Might be good to CHECK_LE(program_.size(), BPF_MAXINSNS) here. Done. (I added a CHECK_LT before the push_back.)
170 *firstBlock = bb;	91 return program_.size() - 1;

171 }

172 (*basic_blocks)[head] = bb;

173 return;

174 }	92 }

175	93

176 BasicBlock* CodeGen::CutGraphIntoBasicBlocks(	94 void CodeGen::CheckValid(Addr addr) const {

177 Instruction* instructions,	95 CHECK_LT(addr, program_.size());

178 const BranchTargets& branch_targets,

179 TargetsToBlocks* basic_blocks) {

180 // Textbook implementation of a basic block generator. All basic blocks

181 // start with a branch target and end with either a return statement or

182 // a jump (or are followed by an instruction that forms the beginning of a

183 // new block). Both conditional and "always" jumps are supported.

184 BasicBlock* first_block = NULL;

185 std::set<const Instruction*> seen_instructions;

186 Instructions stack;

187 Instruction* tail = NULL;

188 Instruction* head = instructions;

189 for (Instruction* insn = head; insn;) {

190 if (seen_instructions.find(insn) != seen_instructions.end()) {

191 // We somehow went in a circle. This should never be possible. Not even

192 // cyclic graphs are supposed to confuse us this much.

193 SANDBOX_DIE("Internal compiler error; cannot compute basic blocks");

194 }

195 seen_instructions.insert(insn);

196 if (tail && branch_targets.find(insn) != branch_targets.end()) {

197 // We reached a branch target. Start a new basic block (this means,

198 // flushing the previous basic block first).

199 AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);

200 head = insn;

201 }

202 if (BPF_CLASS(insn->code) == BPF_JMP) {

203 // We reached a jump instruction, this completes our current basic

204 // block. Flush it and continue by traversing both the true and the

205 // false branch of the jump. We need to maintain a stack to do so.

206 AddBasicBlock(head, insn, branch_targets, basic_blocks, &first_block);

207 if (BPF_OP(insn->code) != BPF_JA) {

208 stack.push_back(insn->jf_ptr);

209 }

210 insn = insn->jt_ptr;

211

212 // If we are jumping to an instruction that we have previously

213 // processed, we are done with this branch. Continue by backtracking

214 // up the stack.

215 while (seen_instructions.find(insn) != seen_instructions.end()) {

216 backtracking:

217 if (stack.empty()) {

218 // We successfully traversed all reachable instructions.

219 return first_block;

220 } else {

221 // Going up the stack.

222 insn = stack.back();

223 stack.pop_back();

224 }

225 }

226 // Starting a new basic block.

227 tail = NULL;

228 head = insn;

229 } else {

230 // We found a non-jumping instruction, append it to current basic

231 // block.

232 tail = insn;

233 insn = insn->next;

234 if (!insn) {

235 // We reached a return statement, flush the current basic block and

236 // backtrack up the stack.

237 AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);

238 goto backtracking;

239 }

240 }

241 }

242 return first_block;

243 }	96 }

244	97

245 // We define a comparator that inspects the sequence of instructions in our	98 bool CodeGen::CacheKeyLess::operator()(const CacheKey& lhs,

246 // basic block and any blocks referenced by this block. This function can be	99 const CacheKey& rhs) const {

247 // used in a "less" comparator for the purpose of storing pointers to basic	100 if (lhs.a != rhs.a)

248 // blocks in STL containers; this gives an easy option to use STL to find	101 return lhs.a < rhs.a;

249 // shared tail sequences of basic blocks.	102 if (lhs.b != rhs.b)

250 static int PointerCompare(const BasicBlock* block1,	103 return lhs.b < rhs.b;

251 const BasicBlock* block2,	104 if (lhs.c != rhs.c)

252 const TargetsToBlocks& blocks) {	105 return lhs.c < rhs.c;

253 // Return <0, 0, or >0 depending on the ordering of "block1" and "block2".	106 if (lhs.d != rhs.d)

254 // If we are looking at the exact same block, this is trivial and we don't	107 return lhs.d < rhs.d;

255 // need to do a full comparison.	108 return false;

256 if (block1 == block2) {

257 return 0;

258 }

259

260 // We compare the sequence of instructions in both basic blocks.

261 const Instructions& insns1 = block1->instructions;

262 const Instructions& insns2 = block2->instructions;

263 // Basic blocks should never be empty.

264 CHECK(!insns1.empty());

265 CHECK(!insns2.empty());

266

267 Instructions::const_iterator iter1 = insns1.begin();

268 Instructions::const_iterator iter2 = insns2.begin();

269 for (;; ++iter1, ++iter2) {

270 // If we have reached the end of the sequence of instructions in one or

271 // both basic blocks, we know the relative ordering between the two blocks

272 // and can return.

273 if (iter1 == insns1.end() \|\| iter2 == insns2.end()) {

274 if (iter1 != insns1.end()) {

275 return 1;

276 }

277 if (iter2 != insns2.end()) {

278 return -1;

279 }

280

281 // If the two blocks are the same length (and have elementwise-equal code

282 // and k fields) and their last instructions are neither a JMP nor a RET

283 // (which is the only way we can reach this point), then we must compare

284 // their successors.

285 Instruction* const insns1_last = insns1.back();

286 Instruction* const insns2_last = insns2.back();

287 CHECK(BPF_CLASS(insns1_last->code) != BPF_JMP &&

288 BPF_CLASS(insns1_last->code) != BPF_RET);

289

290 // Non jumping instructions will always have a valid next instruction.

291 CHECK(insns1_last->next);

292 CHECK(insns2_last->next);

293 return PointerCompare(blocks.find(insns1_last->next)->second,

294 blocks.find(insns2_last->next)->second,

295 blocks);

296 }

297

298 // Compare the individual fields for both instructions.

299 const Instruction& insn1 = **iter1;

300 const Instruction& insn2 = **iter2;

301 if (insn1.code != insn2.code) {

302 return insn1.code - insn2.code;

303 }

304 if (insn1.k != insn2.k) {

305 return insn1.k - insn2.k;

306 }

307

308 // Sanity check: If we're looking at a JMP or RET instruction, by definition

309 // it should be the last instruction of the basic block.

310 if (BPF_CLASS(insn1.code) == BPF_JMP \|\| BPF_CLASS(insn1.code) == BPF_RET) {

311 CHECK_EQ(insns1.back(), &insn1);

312 CHECK_EQ(insns2.back(), &insn2);

313 }

314

315 // RET instructions terminate execution, and only JMP instructions use the

316 // jt_ptr and jf_ptr fields. Anything else can continue to the next

317 // instruction in the basic block.

318 if (BPF_CLASS(insn1.code) == BPF_RET) {

319 return 0;

320 } else if (BPF_CLASS(insn1.code) != BPF_JMP) {

321 continue;

322 }

323

324 // Recursively compare the "true" and "false" branches.

325 // A well-formed BPF program can't have any cycles, so we know

326 // that our recursive algorithm will ultimately terminate.

327 // In the unlikely event that the programmer made a mistake and

328 // went out of the way to give us a cyclic program, we will crash

329 // with a stack overflow. We are OK with that.

330 if (BPF_OP(insn1.code) != BPF_JA) {

331 int c = PointerCompare(blocks.find(insn1.jf_ptr)->second,

332 blocks.find(insn2.jf_ptr)->second,

333 blocks);

334 if (c != 0) {

335 return c;

336 }

337 }

338 return PointerCompare(blocks.find(insn1.jt_ptr)->second,

339 blocks.find(insn2.jt_ptr)->second,

340 blocks);

341 }

342 }

343

344 void CodeGen::MergeTails(TargetsToBlocks* blocks) {

345 // We enter all of our basic blocks into a set using the BasicBlock::Less()

346 // comparator. This naturally results in blocks with identical tails of

347 // instructions to map to the same entry in the set. Whenever we discover

348 // that a particular chain of instructions is already in the set, we merge

349 // the basic blocks and update the pointer in the "blocks" map.

350 // Returns the number of unique basic blocks.

351 // N.B. We don't merge instructions on a granularity that is finer than

352 // a basic block. In practice, this is sufficiently rare that we don't

353 // incur a big cost.

354 // Similarly, we currently don't merge anything other than tails. In

355 // the future, we might decide to revisit this decision and attempt to

356 // merge arbitrary sub-sequences of instructions.

357 BasicBlock::Less<TargetsToBlocks> less(*blocks, PointerCompare);

358 typedef std::set<BasicBlock*, BasicBlock::Less<TargetsToBlocks> > Set;

359 Set seen_basic_blocks(less);

360 for (TargetsToBlocks::iterator iter = blocks->begin(); iter != blocks->end();

361 ++iter) {

362 BasicBlock* bb = iter->second;

363 Set::const_iterator entry = seen_basic_blocks.find(bb);

364 if (entry == seen_basic_blocks.end()) {

365 // This is the first time we see this particular sequence of

366 // instructions. Enter the basic block into the set of known

367 // basic blocks.

368 seen_basic_blocks.insert(bb);

369 } else {

370 // We have previously seen another basic block that defines the same

371 // sequence of instructions. Merge the two blocks and update the

372 // pointer in the "blocks" map.

373 iter->second = *entry;

374 }

375 }

376 }

377

378 void CodeGen::ComputeIncomingBranches(BasicBlock* block,

379 const TargetsToBlocks& targets_to_blocks,

380 IncomingBranches* incoming_branches) {

381 // We increment the number of incoming branches each time we encounter a

382 // basic block. But we only traverse recursively the very first time we

383 // encounter a new block. This is necessary to make topological sorting

384 // work correctly.

385 if (++(*incoming_branches)[block] == 1) {

386 Instruction* last_insn = block->instructions.back();

387 if (BPF_CLASS(last_insn->code) == BPF_JMP) {

388 ComputeIncomingBranches(targets_to_blocks.find(last_insn->jt_ptr)->second,

389 targets_to_blocks,

390 incoming_branches);

391 if (BPF_OP(last_insn->code) != BPF_JA) {

392 ComputeIncomingBranches(

393 targets_to_blocks.find(last_insn->jf_ptr)->second,

394 targets_to_blocks,

395 incoming_branches);

396 }

397 } else if (BPF_CLASS(last_insn->code) != BPF_RET) {

398 ComputeIncomingBranches(targets_to_blocks.find(last_insn->next)->second,

399 targets_to_blocks,

400 incoming_branches);

401 }

402 }

403 }

404

405 void CodeGen::TopoSortBasicBlocks(BasicBlock* first_block,

406 const TargetsToBlocks& blocks,

407 BasicBlocks* basic_blocks) {

408 // Textbook implementation of a toposort. We keep looking for basic blocks

409 // that don't have any incoming branches (initially, this is just the

410 // "first_block") and add them to the topologically sorted list of

411 // "basic_blocks". As we do so, we remove outgoing branches. This potentially

412 // ends up making our descendants eligible for the sorted list. The

413 // sorting algorithm terminates when there are no more basic blocks that have

414 // no incoming branches. If we didn't move all blocks from the set of

415 // "unordered_blocks" to the sorted list of "basic_blocks", there must have

416 // been a cyclic dependency. This should never happen in a BPF program, as

417 // well-formed BPF programs only ever have forward branches.

418 IncomingBranches unordered_blocks;

419 ComputeIncomingBranches(first_block, blocks, &unordered_blocks);

420

421 std::set<BasicBlock*> heads;

422 for (;;) {

423 // Move block from "unordered_blocks" to "basic_blocks".

424 basic_blocks->push_back(first_block);

425

426 // Inspect last instruction in the basic block. This is typically either a

427 // jump or a return statement. But it could also be a "normal" instruction

428 // that is followed by a jump target.

429 Instruction* last_insn = first_block->instructions.back();

430 if (BPF_CLASS(last_insn->code) == BPF_JMP) {

431 // Remove outgoing branches. This might end up moving our descendants

432 // into set of "head" nodes that no longer have any incoming branches.

433 TargetsToBlocks::const_iterator iter;

434 if (BPF_OP(last_insn->code) != BPF_JA) {

435 iter = blocks.find(last_insn->jf_ptr);

436 if (!--unordered_blocks[iter->second]) {

437 heads.insert(iter->second);

438 }

439 }

440 iter = blocks.find(last_insn->jt_ptr);

441 if (!--unordered_blocks[iter->second]) {

442 first_block = iter->second;

443 continue;

444 }

445 } else if (BPF_CLASS(last_insn->code) != BPF_RET) {

446 // We encountered an instruction that doesn't change code flow. Try to

447 // pick the next "first_block" from "last_insn->next", if possible.

448 TargetsToBlocks::const_iterator iter;

449 iter = blocks.find(last_insn->next);

450 if (!--unordered_blocks[iter->second]) {

451 first_block = iter->second;

452 continue;

453 } else {

454 // Our basic block is supposed to be followed by "last_insn->next",

455 // but dependencies prevent this from happening. Insert a BPF_JA

456 // instruction to correct the code flow.

457 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, last_insn->next);

458 first_block->instructions.push_back(ja);

459 last_insn->next = ja;

460 }

461 }

462 if (heads.empty()) {

463 if (unordered_blocks.size() != basic_blocks->size()) {

464 SANDBOX_DIE("Internal compiler error; cyclic graph detected");

465 }

466 return;

467 }

468 // Proceed by picking an arbitrary node from the set of basic blocks that

469 // do not have any incoming branches.

470 first_block = *heads.begin();

471 heads.erase(heads.begin());

472 }

473 }

474

475 void CodeGen::ComputeRelativeJumps(BasicBlocks* basic_blocks,

476 const TargetsToBlocks& targets_to_blocks) {

477 // While we previously used pointers in jt_ptr and jf_ptr to link jump

478 // instructions to their targets, we now convert these jumps to relative

479 // jumps that are suitable for loading the BPF program into the kernel.

480 int offset = 0;

481

482 // Since we just completed a toposort, all jump targets are guaranteed to

483 // go forward. This means, iterating over the basic blocks in reverse makes

484 // it trivial to compute the correct offsets.

485 BasicBlock* bb = NULL;

486 BasicBlock* last_bb = NULL;

487 for (BasicBlocks::reverse_iterator iter = basic_blocks->rbegin();

488 iter != basic_blocks->rend();

489 ++iter) {

490 last_bb = bb;

491 bb = *iter;

492 Instruction* insn = bb->instructions.back();

493 if (BPF_CLASS(insn->code) == BPF_JMP) {

494 // Basic block ended in a jump instruction. We can now compute the

495 // appropriate offsets.

496 if (BPF_OP(insn->code) == BPF_JA) {

497 // "Always" jumps use the 32bit "k" field for the offset, instead

498 // of the 8bit "jt" and "jf" fields.

499 int jmp = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;

500 insn->k = jmp;

501 insn->jt = insn->jf = 0;

502 } else {

503 // The offset computations for conditional jumps are just the same

504 // as for "always" jumps.

505 int jt = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;

506 int jf = offset - targets_to_blocks.find(insn->jf_ptr)->second->offset;

507

508 // There is an added complication, because conditional relative jumps

509 // can only jump at most 255 instructions forward. If we have to jump

510 // further, insert an extra "always" jump.

511 Instructions::size_type jmp = bb->instructions.size();

512 if (jt > 255 \|\| (jt == 255 && jf > 255)) {

513 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jt_ptr);

514 bb->instructions.push_back(ja);

515 ja->k = jt;

516 ja->jt = ja->jf = 0;

517

518 // The newly inserted "always" jump, of course, requires us to adjust

519 // the jump targets in the original conditional jump.

520 jt = 0;

521 ++jf;

522 }

523 if (jf > 255) {

524 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jf_ptr);

525 bb->instructions.insert(bb->instructions.begin() + jmp, ja);

526 ja->k = jf;

527 ja->jt = ja->jf = 0;

528

529 // Again, we have to adjust the jump targets in the original

530 // conditional jump.

531 ++jt;

532 jf = 0;

533 }

534

535 // Now we can finally set the relative jump targets in the conditional

536 // jump instruction. Afterwards, we must no longer access the jt_ptr

537 // and jf_ptr fields.

538 insn->jt = jt;

539 insn->jf = jf;

540 }

541 } else if (BPF_CLASS(insn->code) != BPF_RET &&

542 targets_to_blocks.find(insn->next)->second != last_bb) {

543 SANDBOX_DIE("Internal compiler error; invalid basic block encountered");

544 }

545

546 // Proceed to next basic block.

547 offset += bb->instructions.size();

548 bb->offset = offset;

549 }

550 return;

551 }

552

553 void CodeGen::ConcatenateBasicBlocks(const BasicBlocks& basic_blocks,

554 Program* program) {

555 // Our basic blocks have been sorted and relative jump offsets have been

556 // computed. The last remaining step is for all the instructions in our

557 // basic blocks to be concatenated into a BPF program.

558 program->clear();

559 for (BasicBlocks::const_iterator bb_iter = basic_blocks.begin();

560 bb_iter != basic_blocks.end();

561 ++bb_iter) {

562 const BasicBlock& bb = **bb_iter;

563 for (Instructions::const_iterator insn_iter = bb.instructions.begin();

564 insn_iter != bb.instructions.end();

565 ++insn_iter) {

566 const Instruction& insn = **insn_iter;

567 program->push_back(

568 (struct sock_filter) {insn.code, insn.jt, insn.jf, insn.k});

569 }

570 }

571 return;

572 }

573

574 void CodeGen::Compile(Instruction* instructions, Program* program) {

575 if (compiled_) {

576 SANDBOX_DIE(

577 "Cannot call Compile() multiple times. Create a new code "

578 "generator instead");

579 }

580 compiled_ = true;

581

582 BranchTargets branch_targets;

583 FindBranchTargets(*instructions, &branch_targets);

584 TargetsToBlocks all_blocks;

585 BasicBlock* first_block =

586 CutGraphIntoBasicBlocks(instructions, branch_targets, &all_blocks);

587 MergeTails(&all_blocks);

588 BasicBlocks basic_blocks;

589 TopoSortBasicBlocks(first_block, all_blocks, &basic_blocks);

590 ComputeRelativeJumps(&basic_blocks, all_blocks);

591 ConcatenateBasicBlocks(basic_blocks, program);

592 return;

593 }	109 }

594	110

595 } // namespace sandbox	111 } // namespace sandbox

OLD	NEW

« no previous file with comments | « sandbox/linux/seccomp-bpf/codegen.h ('k') | sandbox/linux/seccomp-bpf/codegen_unittest.cc » ('j') | sandbox/linux/seccomp-bpf/codegen_unittest.cc » ('J')