sandbox/linux/seccomp-bpf/codegen_unittest.cc - Issue 888093005: sandbox: move Codegen into bpf_dsl directory

Side by Side Diff: sandbox/linux/seccomp-bpf/codegen_unittest.cc

Issue 888093005: sandbox: move Codegen into bpf_dsl directory (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 5 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #include "sandbox/linux/seccomp-bpf/codegen.h"

6

7 #include <linux/filter.h>

8

9 #include <map>

10 #include <utility>

11 #include <vector>

12

13 #include "base/macros.h"

14 #include "base/md5.h"

15 #include "base/strings/string_piece.h"

16 #include "testing/gtest/include/gtest/gtest.h"

17

18 namespace sandbox {

19 namespace {

20

21 // Hash provides an abstraction for building "hash trees" from BPF

22 // control flow graphs, and efficiently identifying equivalent graphs.

23 //

24 // For simplicity, we use MD5, because base happens to provide a

25 // convenient API for its use. However, any collision-resistant hash

26 // should suffice.

27 class Hash {

28 public:

29 static const Hash kZero;

30

31 Hash() : digest_() {}

32

33 Hash(uint16_t code,

34 uint32_t k,

35 const Hash& jt = kZero,

36 const Hash& jf = kZero)

37 : digest_() {

38 base::MD5Context ctx;

39 base::MD5Init(&ctx);

40 HashValue(&ctx, code);

41 HashValue(&ctx, k);

42 HashValue(&ctx, jt);

43 HashValue(&ctx, jf);

44 base::MD5Final(&digest_, &ctx);

45 }

46

47 Hash(const Hash& hash) = default;

48 Hash& operator=(const Hash& rhs) = default;

49

50 friend bool operator==(const Hash& lhs, const Hash& rhs) {

51 return lhs.Base16() == rhs.Base16();

52 }

53 friend bool operator!=(const Hash& lhs, const Hash& rhs) {

54 return !(lhs == rhs);

55 }

56

57 private:

58 template <typename T>

59 void HashValue(base::MD5Context* ctx, const T& value) {

60 base::MD5Update(ctx,

61 base::StringPiece(reinterpret_cast<const char*>(&value),

62 sizeof(value)));

63 }

64

65 std::string Base16() const {

66 return MD5DigestToBase16(digest_);

67 }

68

69 base::MD5Digest digest_;

70 };

71

72 const Hash Hash::kZero;

73

74 // Sanity check that equality and inequality work on Hash as required.

75 TEST(CodeGen, HashSanity) {

76 std::vector<Hash> hashes;

77

78 // Push a bunch of logically distinct hashes.

79 hashes.push_back(Hash::kZero);

80 for (int i = 0; i < 4; ++i) {

81 hashes.push_back(Hash(i & 1, i & 2));

82 }

83 for (int i = 0; i < 16; ++i) {

84 hashes.push_back(Hash(i & 1, i & 2, Hash(i & 4, i & 8)));

85 }

86 for (int i = 0; i < 64; ++i) {

87 hashes.push_back(

88 Hash(i & 1, i & 2, Hash(i & 4, i & 8), Hash(i & 16, i & 32)));

89 }

90

91 for (const Hash& a : hashes) {

92 for (const Hash& b : hashes) {

93 // Hashes should equal themselves, but not equal all others.

94 if (&a == &b) {

95 EXPECT_EQ(a, b);

96 } else {

97 EXPECT_NE(a, b);

98 }

99 }

100 }

101 }

102

103 // ProgramTest provides a fixture for writing compiling sample

104 // programs with CodeGen and verifying the linearized output matches

105 // the input DAG.

106 class ProgramTest : public ::testing::Test {

107 protected:

108 ProgramTest() : gen_(), node_hashes_() {}

109

110 // MakeInstruction calls CodeGen::MakeInstruction() and associated

111 // the returned address with a hash of the instruction.

112 CodeGen::Node MakeInstruction(uint16_t code,

113 uint32_t k,

114 CodeGen::Node jt = CodeGen::kNullNode,

115 CodeGen::Node jf = CodeGen::kNullNode) {

116 CodeGen::Node res = gen_.MakeInstruction(code, k, jt, jf);

117 EXPECT_NE(CodeGen::kNullNode, res);

118

119 Hash digest(code, k, Lookup(jt), Lookup(jf));

120 auto it = node_hashes_.insert(std::make_pair(res, digest));

121 EXPECT_EQ(digest, it.first->second);

122

123 return res;

124 }

125

126 // RunTest compiles the program and verifies that the output matches

127 // what is expected. It should be called at the end of each program

128 // test case.

129 void RunTest(CodeGen::Node head) {

130 // Compile the program

131 CodeGen::Program program;

132 gen_.Compile(head, &program);

133

134 // Walk the program backwards, and compute the hash for each instruction.

135 std::vector<Hash> prog_hashes(program.size());

136 for (size_t i = program.size(); i > 0; --i) {

137 const sock_filter& insn = program.at(i - 1);

138 Hash& hash = prog_hashes.at(i - 1);

139

140 if (BPF_CLASS(insn.code) == BPF_JMP) {

141 if (BPF_OP(insn.code) == BPF_JA) {

142 // The compiler adds JA instructions as needed, so skip them.

143 hash = prog_hashes.at(i + insn.k);

144 } else {

145 hash = Hash(insn.code, insn.k, prog_hashes.at(i + insn.jt),

146 prog_hashes.at(i + insn.jf));

147 }

148 } else if (BPF_CLASS(insn.code) == BPF_RET) {

149 hash = Hash(insn.code, insn.k);

150 } else {

151 hash = Hash(insn.code, insn.k, prog_hashes.at(i));

152 }

153 }

154

155 EXPECT_EQ(Lookup(head), prog_hashes.at(0));

156 }

157

158 private:

159 const Hash& Lookup(CodeGen::Node next) const {

160 if (next == CodeGen::kNullNode) {

161 return Hash::kZero;

162 }

163 auto it = node_hashes_.find(next);

164 if (it == node_hashes_.end()) {

165 ADD_FAILURE() << "No hash found for node " << next;

166 return Hash::kZero;

167 }

168 return it->second;

169 }

170

171 CodeGen gen_;

172 std::map<CodeGen::Node, Hash> node_hashes_;

173

174 DISALLOW_COPY_AND_ASSIGN(ProgramTest);

175 };

176

177 TEST_F(ProgramTest, OneInstruction) {

178 // Create the most basic valid BPF program:

179 // RET 0

180 CodeGen::Node head = MakeInstruction(BPF_RET + BPF_K, 0);

181 RunTest(head);

182 }

183

184 TEST_F(ProgramTest, SimpleBranch) {

185 // Create a program with a single branch:

186 // JUMP if eq 42 then $0 else $1

187 // 0: RET 1

188 // 1: RET 0

189 CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42,

190 MakeInstruction(BPF_RET + BPF_K, 1),

191 MakeInstruction(BPF_RET + BPF_K, 0));

192 RunTest(head);

193 }

194

195 TEST_F(ProgramTest, AtypicalBranch) {

196 // Create a program with a single branch:

197 // JUMP if eq 42 then $0 else $0

198 // 0: RET 0

199

200 CodeGen::Node ret = MakeInstruction(BPF_RET + BPF_K, 0);

201 CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, ret, ret);

202

203 // N.B.: As the instructions in both sides of the branch are already

204 // the same object, we do not actually have any "mergeable" branches.

205 // This needs to be reflected in our choice of "flags".

206 RunTest(head);

207 }

208

209 TEST_F(ProgramTest, Complex) {

210 // Creates a basic BPF program that we'll use to test some of the code:

211 // JUMP if eq 42 the $0 else $1 (insn6)

212 // 0: LD 23 (insn5)

213 // 1: JUMP if eq 42 then $2 else $4 (insn4)

214 // 2: JUMP to $3 (insn2)

215 // 3: LD 42 (insn1)

216 // RET 42 (insn0)

217 // 4: LD 42 (insn3)

218 // RET 42 (insn3+)

219 CodeGen::Node insn0 = MakeInstruction(BPF_RET + BPF_K, 42);

220 CodeGen::Node insn1 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42, insn0);

221 CodeGen::Node insn2 = insn1; // Implicit JUMP

222

223 // We explicitly duplicate instructions to test that they're merged.

224 CodeGen::Node insn3 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42,

225 MakeInstruction(BPF_RET + BPF_K, 42));

226 EXPECT_EQ(insn2, insn3);

227

228 CodeGen::Node insn4 =

229 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn2, insn3);

230 CodeGen::Node insn5 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 23, insn4);

231

232 // Force a basic block that ends in neither a jump instruction nor a return

233 // instruction. It only contains "insn5". This exercises one of the less

234 // common code paths in the topo-sort algorithm.

235 // This also gives us a diamond-shaped pattern in our graph, which stresses

236 // another aspect of the topo-sort algorithm (namely, the ability to

237 // correctly count the incoming branches for subtrees that are not disjunct).

238 CodeGen::Node insn6 =

239 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn5, insn4);

240

241 RunTest(insn6);

242 }

243

244 TEST_F(ProgramTest, ConfusingTails) {

245 // This simple program demonstrates https://crbug.com/351103/

246 // The two "LOAD 0" instructions are blocks of their own. MergeTails() could

247 // be tempted to merge them since they are the same. However, they are

248 // not mergeable because they fall-through to non semantically equivalent

249 // blocks.

250 // Without the fix for this bug, this program should trigger the check in

251 // CompileAndCompare: the serialized graphs from the program and its compiled

252 // version will differ.

253 //

254 // 0) LOAD 1 // ???

255 // 1) if A == 0x1; then JMP 2 else JMP 3

256 // 2) LOAD 0 // System call number

257 // 3) if A == 0x2; then JMP 4 else JMP 5

258 // 4) LOAD 0 // System call number

259 // 5) if A == 0x1; then JMP 6 else JMP 7

260 // 6) RET 0

261 // 7) RET 1

262

263 CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1);

264 CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0);

265 CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7);

266 CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5);

267 CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5);

268 CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3);

269 CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3);

270 CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1);

271

272 RunTest(i0);

273 }

274

275 TEST_F(ProgramTest, ConfusingTailsBasic) {

276 // Without the fix for https://crbug.com/351103/, (see

277 // SampleProgramConfusingTails()), this would generate a cyclic graph and

278 // crash as the two "LOAD 0" instructions would get merged.

279 //

280 // 0) LOAD 1 // ???

281 // 1) if A == 0x1; then JMP 2 else JMP 3

282 // 2) LOAD 0 // System call number

283 // 3) if A == 0x2; then JMP 4 else JMP 5

284 // 4) LOAD 0 // System call number

285 // 5) RET 1

286

287 CodeGen::Node i5 = MakeInstruction(BPF_RET + BPF_K, 1);

288 CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5);

289 CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5);

290 CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3);

291 CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3);

292 CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1);

293

294 RunTest(i0);

295 }

296

297 TEST_F(ProgramTest, ConfusingTailsMergeable) {

298 // This is similar to SampleProgramConfusingTails(), except that

299 // instructions 2 and 4 are now RET instructions.

300 // In PointerCompare(), this exercises the path where two blocks are of the

301 // same length and identical and the last instruction is a JMP or RET, so the

302 // following blocks don't need to be looked at and the blocks are mergeable.

303 //

304 // 0) LOAD 1 // ???

305 // 1) if A == 0x1; then JMP 2 else JMP 3

306 // 2) RET 42

307 // 3) if A == 0x2; then JMP 4 else JMP 5

308 // 4) RET 42

309 // 5) if A == 0x1; then JMP 6 else JMP 7

310 // 6) RET 0

311 // 7) RET 1

312

313 CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1);

314 CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0);

315 CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7);

316 CodeGen::Node i4 = MakeInstruction(BPF_RET + BPF_K, 42);

317 CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5);

318 CodeGen::Node i2 = MakeInstruction(BPF_RET + BPF_K, 42);

319 CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3);

320 CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1);

321

322 RunTest(i0);

323 }

324

325 TEST_F(ProgramTest, InstructionFolding) {

326 // Check that simple instructions are folded as expected.

327 CodeGen::Node a = MakeInstruction(BPF_RET + BPF_K, 0);

328 EXPECT_EQ(a, MakeInstruction(BPF_RET + BPF_K, 0));

329 CodeGen::Node b = MakeInstruction(BPF_RET + BPF_K, 1);

330 EXPECT_EQ(a, MakeInstruction(BPF_RET + BPF_K, 0));

331 EXPECT_EQ(b, MakeInstruction(BPF_RET + BPF_K, 1));

332 EXPECT_EQ(b, MakeInstruction(BPF_RET + BPF_K, 1));

333

334 // Check that complex sequences are folded too.

335 CodeGen::Node c =

336 MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0,

337 MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, 0x100, a, b));

338 EXPECT_EQ(c, MakeInstruction(

339 BPF_LD + BPF_W + BPF_ABS, 0,

340 MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, 0x100, a, b)));

341

342 RunTest(c);

343 }

344

345 TEST_F(ProgramTest, FarBranches) {

346 // BPF instructions use 8-bit fields for branch offsets, which means

347 // branch targets must be within 255 instructions of the branch

348 // instruction. CodeGen abstracts away this detail by inserting jump

349 // instructions as needed, which we test here by generating programs

350 // that should trigger any interesting boundary conditions.

351

352 // Populate with 260 initial instruction nodes.

353 std::vector<CodeGen::Node> nodes;

354 nodes.push_back(MakeInstruction(BPF_RET + BPF_K, 0));

355 for (size_t i = 1; i < 260; ++i) {

356 nodes.push_back(

357 MakeInstruction(BPF_ALU + BPF_ADD + BPF_K, i, nodes.back()));

358 }

359

360 // Exhaustively test branch offsets near BPF's limits.

361 for (size_t jt = 250; jt < 260; ++jt) {

362 for (size_t jf = 250; jf < 260; ++jf) {

363 nodes.push_back(MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 0,

364 nodes.rbegin()[jt], nodes.rbegin()[jf]));

365 RunTest(nodes.back());

366 }

367 }

368 }

369

370 TEST_F(ProgramTest, JumpReuse) {

371 // As a code size optimization, we try to reuse jumps when possible

372 // instead of emitting new ones. Here we make sure that optimization

373 // is working as intended.

374 //

375 // NOTE: To simplify testing, we rely on implementation details

376 // about what CodeGen::Node values indicate (i.e., vector indices),

377 // but CodeGen users should treat them as opaque values.

378

379 // Populate with 260 initial instruction nodes.

380 std::vector<CodeGen::Node> nodes;

381 nodes.push_back(MakeInstruction(BPF_RET + BPF_K, 0));

382 for (size_t i = 1; i < 260; ++i) {

383 nodes.push_back(

384 MakeInstruction(BPF_ALU + BPF_ADD + BPF_K, i, nodes.back()));

385 }

386

387 // Branching to nodes[0] and nodes[1] should require 3 new

388 // instructions: two far jumps plus the branch itself.

389 CodeGen::Node one =

390 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 0, nodes[0], nodes[1]);

391 EXPECT_EQ(nodes.back() + 3, one); // XXX: Implementation detail!

392 RunTest(one);

393

394 // Branching again to the same target nodes should require only one

395 // new instruction, as we can reuse the previous branch's jumps.

396 CodeGen::Node two =

397 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, nodes[0], nodes[1]);

398 EXPECT_EQ(one + 1, two); // XXX: Implementation detail!

399 RunTest(two);

400 }

401

402 } // namespace

403 } // namespace sandbox

OLD	NEW

« no previous file with comments | « sandbox/linux/seccomp-bpf/codegen.cc ('k') | sandbox/linux/seccomp-bpf/sandbox_bpf.h » ('j') | no next file with comments »