OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "sandbox/linux/seccomp-bpf/codegen.h" | |
6 | |
7 #include <linux/filter.h> | |
8 | |
9 #include <map> | |
10 #include <utility> | |
11 #include <vector> | |
12 | |
13 #include "base/macros.h" | |
14 #include "base/md5.h" | |
15 #include "base/strings/string_piece.h" | |
16 #include "testing/gtest/include/gtest/gtest.h" | |
17 | |
18 namespace sandbox { | |
19 namespace { | |
20 | |
21 // Hash provides an abstraction for building "hash trees" from BPF | |
22 // control flow graphs, and efficiently identifying equivalent graphs. | |
23 // | |
24 // For simplicity, we use MD5, because base happens to provide a | |
25 // convenient API for its use. However, any collision-resistant hash | |
26 // should suffice. | |
27 class Hash { | |
28 public: | |
29 static const Hash kZero; | |
30 | |
31 Hash() : digest_() {} | |
32 | |
33 Hash(uint16_t code, | |
34 uint32_t k, | |
35 const Hash& jt = kZero, | |
36 const Hash& jf = kZero) | |
37 : digest_() { | |
38 base::MD5Context ctx; | |
39 base::MD5Init(&ctx); | |
40 HashValue(&ctx, code); | |
41 HashValue(&ctx, k); | |
42 HashValue(&ctx, jt); | |
43 HashValue(&ctx, jf); | |
44 base::MD5Final(&digest_, &ctx); | |
45 } | |
46 | |
47 Hash(const Hash& hash) = default; | |
48 Hash& operator=(const Hash& rhs) = default; | |
49 | |
50 friend bool operator==(const Hash& lhs, const Hash& rhs) { | |
51 return lhs.Base16() == rhs.Base16(); | |
52 } | |
53 friend bool operator!=(const Hash& lhs, const Hash& rhs) { | |
54 return !(lhs == rhs); | |
55 } | |
56 | |
57 private: | |
58 template <typename T> | |
59 void HashValue(base::MD5Context* ctx, const T& value) { | |
60 base::MD5Update(ctx, | |
61 base::StringPiece(reinterpret_cast<const char*>(&value), | |
62 sizeof(value))); | |
63 } | |
64 | |
65 std::string Base16() const { | |
66 return MD5DigestToBase16(digest_); | |
67 } | |
68 | |
69 base::MD5Digest digest_; | |
70 }; | |
71 | |
72 const Hash Hash::kZero; | |
73 | |
74 // Sanity check that equality and inequality work on Hash as required. | |
75 TEST(CodeGen, HashSanity) { | |
76 std::vector<Hash> hashes; | |
77 | |
78 // Push a bunch of logically distinct hashes. | |
79 hashes.push_back(Hash::kZero); | |
80 for (int i = 0; i < 4; ++i) { | |
81 hashes.push_back(Hash(i & 1, i & 2)); | |
82 } | |
83 for (int i = 0; i < 16; ++i) { | |
84 hashes.push_back(Hash(i & 1, i & 2, Hash(i & 4, i & 8))); | |
85 } | |
86 for (int i = 0; i < 64; ++i) { | |
87 hashes.push_back( | |
88 Hash(i & 1, i & 2, Hash(i & 4, i & 8), Hash(i & 16, i & 32))); | |
89 } | |
90 | |
91 for (const Hash& a : hashes) { | |
92 for (const Hash& b : hashes) { | |
93 // Hashes should equal themselves, but not equal all others. | |
94 if (&a == &b) { | |
95 EXPECT_EQ(a, b); | |
96 } else { | |
97 EXPECT_NE(a, b); | |
98 } | |
99 } | |
100 } | |
101 } | |
102 | |
103 // ProgramTest provides a fixture for writing compiling sample | |
104 // programs with CodeGen and verifying the linearized output matches | |
105 // the input DAG. | |
106 class ProgramTest : public ::testing::Test { | |
107 protected: | |
108 ProgramTest() : gen_(), node_hashes_() {} | |
109 | |
110 // MakeInstruction calls CodeGen::MakeInstruction() and associated | |
111 // the returned address with a hash of the instruction. | |
112 CodeGen::Node MakeInstruction(uint16_t code, | |
113 uint32_t k, | |
114 CodeGen::Node jt = CodeGen::kNullNode, | |
115 CodeGen::Node jf = CodeGen::kNullNode) { | |
116 CodeGen::Node res = gen_.MakeInstruction(code, k, jt, jf); | |
117 EXPECT_NE(CodeGen::kNullNode, res); | |
118 | |
119 Hash digest(code, k, Lookup(jt), Lookup(jf)); | |
120 auto it = node_hashes_.insert(std::make_pair(res, digest)); | |
121 EXPECT_EQ(digest, it.first->second); | |
122 | |
123 return res; | |
124 } | |
125 | |
126 // RunTest compiles the program and verifies that the output matches | |
127 // what is expected. It should be called at the end of each program | |
128 // test case. | |
129 void RunTest(CodeGen::Node head) { | |
130 // Compile the program | |
131 CodeGen::Program program; | |
132 gen_.Compile(head, &program); | |
133 | |
134 // Walk the program backwards, and compute the hash for each instruction. | |
135 std::vector<Hash> prog_hashes(program.size()); | |
136 for (size_t i = program.size(); i > 0; --i) { | |
137 const sock_filter& insn = program.at(i - 1); | |
138 Hash& hash = prog_hashes.at(i - 1); | |
139 | |
140 if (BPF_CLASS(insn.code) == BPF_JMP) { | |
141 if (BPF_OP(insn.code) == BPF_JA) { | |
142 // The compiler adds JA instructions as needed, so skip them. | |
143 hash = prog_hashes.at(i + insn.k); | |
144 } else { | |
145 hash = Hash(insn.code, insn.k, prog_hashes.at(i + insn.jt), | |
146 prog_hashes.at(i + insn.jf)); | |
147 } | |
148 } else if (BPF_CLASS(insn.code) == BPF_RET) { | |
149 hash = Hash(insn.code, insn.k); | |
150 } else { | |
151 hash = Hash(insn.code, insn.k, prog_hashes.at(i)); | |
152 } | |
153 } | |
154 | |
155 EXPECT_EQ(Lookup(head), prog_hashes.at(0)); | |
156 } | |
157 | |
158 private: | |
159 const Hash& Lookup(CodeGen::Node next) const { | |
160 if (next == CodeGen::kNullNode) { | |
161 return Hash::kZero; | |
162 } | |
163 auto it = node_hashes_.find(next); | |
164 if (it == node_hashes_.end()) { | |
165 ADD_FAILURE() << "No hash found for node " << next; | |
166 return Hash::kZero; | |
167 } | |
168 return it->second; | |
169 } | |
170 | |
171 CodeGen gen_; | |
172 std::map<CodeGen::Node, Hash> node_hashes_; | |
173 | |
174 DISALLOW_COPY_AND_ASSIGN(ProgramTest); | |
175 }; | |
176 | |
177 TEST_F(ProgramTest, OneInstruction) { | |
178 // Create the most basic valid BPF program: | |
179 // RET 0 | |
180 CodeGen::Node head = MakeInstruction(BPF_RET + BPF_K, 0); | |
181 RunTest(head); | |
182 } | |
183 | |
184 TEST_F(ProgramTest, SimpleBranch) { | |
185 // Create a program with a single branch: | |
186 // JUMP if eq 42 then $0 else $1 | |
187 // 0: RET 1 | |
188 // 1: RET 0 | |
189 CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, | |
190 MakeInstruction(BPF_RET + BPF_K, 1), | |
191 MakeInstruction(BPF_RET + BPF_K, 0)); | |
192 RunTest(head); | |
193 } | |
194 | |
195 TEST_F(ProgramTest, AtypicalBranch) { | |
196 // Create a program with a single branch: | |
197 // JUMP if eq 42 then $0 else $0 | |
198 // 0: RET 0 | |
199 | |
200 CodeGen::Node ret = MakeInstruction(BPF_RET + BPF_K, 0); | |
201 CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, ret, ret); | |
202 | |
203 // N.B.: As the instructions in both sides of the branch are already | |
204 // the same object, we do not actually have any "mergeable" branches. | |
205 // This needs to be reflected in our choice of "flags". | |
206 RunTest(head); | |
207 } | |
208 | |
209 TEST_F(ProgramTest, Complex) { | |
210 // Creates a basic BPF program that we'll use to test some of the code: | |
211 // JUMP if eq 42 the $0 else $1 (insn6) | |
212 // 0: LD 23 (insn5) | |
213 // 1: JUMP if eq 42 then $2 else $4 (insn4) | |
214 // 2: JUMP to $3 (insn2) | |
215 // 3: LD 42 (insn1) | |
216 // RET 42 (insn0) | |
217 // 4: LD 42 (insn3) | |
218 // RET 42 (insn3+) | |
219 CodeGen::Node insn0 = MakeInstruction(BPF_RET + BPF_K, 42); | |
220 CodeGen::Node insn1 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42, insn0); | |
221 CodeGen::Node insn2 = insn1; // Implicit JUMP | |
222 | |
223 // We explicitly duplicate instructions to test that they're merged. | |
224 CodeGen::Node insn3 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42, | |
225 MakeInstruction(BPF_RET + BPF_K, 42)); | |
226 EXPECT_EQ(insn2, insn3); | |
227 | |
228 CodeGen::Node insn4 = | |
229 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn2, insn3); | |
230 CodeGen::Node insn5 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 23, insn4); | |
231 | |
232 // Force a basic block that ends in neither a jump instruction nor a return | |
233 // instruction. It only contains "insn5". This exercises one of the less | |
234 // common code paths in the topo-sort algorithm. | |
235 // This also gives us a diamond-shaped pattern in our graph, which stresses | |
236 // another aspect of the topo-sort algorithm (namely, the ability to | |
237 // correctly count the incoming branches for subtrees that are not disjunct). | |
238 CodeGen::Node insn6 = | |
239 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn5, insn4); | |
240 | |
241 RunTest(insn6); | |
242 } | |
243 | |
244 TEST_F(ProgramTest, ConfusingTails) { | |
245 // This simple program demonstrates https://crbug.com/351103/ | |
246 // The two "LOAD 0" instructions are blocks of their own. MergeTails() could | |
247 // be tempted to merge them since they are the same. However, they are | |
248 // not mergeable because they fall-through to non semantically equivalent | |
249 // blocks. | |
250 // Without the fix for this bug, this program should trigger the check in | |
251 // CompileAndCompare: the serialized graphs from the program and its compiled | |
252 // version will differ. | |
253 // | |
254 // 0) LOAD 1 // ??? | |
255 // 1) if A == 0x1; then JMP 2 else JMP 3 | |
256 // 2) LOAD 0 // System call number | |
257 // 3) if A == 0x2; then JMP 4 else JMP 5 | |
258 // 4) LOAD 0 // System call number | |
259 // 5) if A == 0x1; then JMP 6 else JMP 7 | |
260 // 6) RET 0 | |
261 // 7) RET 1 | |
262 | |
263 CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1); | |
264 CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0); | |
265 CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7); | |
266 CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5); | |
267 CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); | |
268 CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3); | |
269 CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); | |
270 CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); | |
271 | |
272 RunTest(i0); | |
273 } | |
274 | |
275 TEST_F(ProgramTest, ConfusingTailsBasic) { | |
276 // Without the fix for https://crbug.com/351103/, (see | |
277 // SampleProgramConfusingTails()), this would generate a cyclic graph and | |
278 // crash as the two "LOAD 0" instructions would get merged. | |
279 // | |
280 // 0) LOAD 1 // ??? | |
281 // 1) if A == 0x1; then JMP 2 else JMP 3 | |
282 // 2) LOAD 0 // System call number | |
283 // 3) if A == 0x2; then JMP 4 else JMP 5 | |
284 // 4) LOAD 0 // System call number | |
285 // 5) RET 1 | |
286 | |
287 CodeGen::Node i5 = MakeInstruction(BPF_RET + BPF_K, 1); | |
288 CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5); | |
289 CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); | |
290 CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3); | |
291 CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); | |
292 CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); | |
293 | |
294 RunTest(i0); | |
295 } | |
296 | |
297 TEST_F(ProgramTest, ConfusingTailsMergeable) { | |
298 // This is similar to SampleProgramConfusingTails(), except that | |
299 // instructions 2 and 4 are now RET instructions. | |
300 // In PointerCompare(), this exercises the path where two blocks are of the | |
301 // same length and identical and the last instruction is a JMP or RET, so the | |
302 // following blocks don't need to be looked at and the blocks are mergeable. | |
303 // | |
304 // 0) LOAD 1 // ??? | |
305 // 1) if A == 0x1; then JMP 2 else JMP 3 | |
306 // 2) RET 42 | |
307 // 3) if A == 0x2; then JMP 4 else JMP 5 | |
308 // 4) RET 42 | |
309 // 5) if A == 0x1; then JMP 6 else JMP 7 | |
310 // 6) RET 0 | |
311 // 7) RET 1 | |
312 | |
313 CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1); | |
314 CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0); | |
315 CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7); | |
316 CodeGen::Node i4 = MakeInstruction(BPF_RET + BPF_K, 42); | |
317 CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); | |
318 CodeGen::Node i2 = MakeInstruction(BPF_RET + BPF_K, 42); | |
319 CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); | |
320 CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); | |
321 | |
322 RunTest(i0); | |
323 } | |
324 | |
325 TEST_F(ProgramTest, InstructionFolding) { | |
326 // Check that simple instructions are folded as expected. | |
327 CodeGen::Node a = MakeInstruction(BPF_RET + BPF_K, 0); | |
328 EXPECT_EQ(a, MakeInstruction(BPF_RET + BPF_K, 0)); | |
329 CodeGen::Node b = MakeInstruction(BPF_RET + BPF_K, 1); | |
330 EXPECT_EQ(a, MakeInstruction(BPF_RET + BPF_K, 0)); | |
331 EXPECT_EQ(b, MakeInstruction(BPF_RET + BPF_K, 1)); | |
332 EXPECT_EQ(b, MakeInstruction(BPF_RET + BPF_K, 1)); | |
333 | |
334 // Check that complex sequences are folded too. | |
335 CodeGen::Node c = | |
336 MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, | |
337 MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, 0x100, a, b)); | |
338 EXPECT_EQ(c, MakeInstruction( | |
339 BPF_LD + BPF_W + BPF_ABS, 0, | |
340 MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, 0x100, a, b))); | |
341 | |
342 RunTest(c); | |
343 } | |
344 | |
345 TEST_F(ProgramTest, FarBranches) { | |
346 // BPF instructions use 8-bit fields for branch offsets, which means | |
347 // branch targets must be within 255 instructions of the branch | |
348 // instruction. CodeGen abstracts away this detail by inserting jump | |
349 // instructions as needed, which we test here by generating programs | |
350 // that should trigger any interesting boundary conditions. | |
351 | |
352 // Populate with 260 initial instruction nodes. | |
353 std::vector<CodeGen::Node> nodes; | |
354 nodes.push_back(MakeInstruction(BPF_RET + BPF_K, 0)); | |
355 for (size_t i = 1; i < 260; ++i) { | |
356 nodes.push_back( | |
357 MakeInstruction(BPF_ALU + BPF_ADD + BPF_K, i, nodes.back())); | |
358 } | |
359 | |
360 // Exhaustively test branch offsets near BPF's limits. | |
361 for (size_t jt = 250; jt < 260; ++jt) { | |
362 for (size_t jf = 250; jf < 260; ++jf) { | |
363 nodes.push_back(MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 0, | |
364 nodes.rbegin()[jt], nodes.rbegin()[jf])); | |
365 RunTest(nodes.back()); | |
366 } | |
367 } | |
368 } | |
369 | |
370 TEST_F(ProgramTest, JumpReuse) { | |
371 // As a code size optimization, we try to reuse jumps when possible | |
372 // instead of emitting new ones. Here we make sure that optimization | |
373 // is working as intended. | |
374 // | |
375 // NOTE: To simplify testing, we rely on implementation details | |
376 // about what CodeGen::Node values indicate (i.e., vector indices), | |
377 // but CodeGen users should treat them as opaque values. | |
378 | |
379 // Populate with 260 initial instruction nodes. | |
380 std::vector<CodeGen::Node> nodes; | |
381 nodes.push_back(MakeInstruction(BPF_RET + BPF_K, 0)); | |
382 for (size_t i = 1; i < 260; ++i) { | |
383 nodes.push_back( | |
384 MakeInstruction(BPF_ALU + BPF_ADD + BPF_K, i, nodes.back())); | |
385 } | |
386 | |
387 // Branching to nodes[0] and nodes[1] should require 3 new | |
388 // instructions: two far jumps plus the branch itself. | |
389 CodeGen::Node one = | |
390 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 0, nodes[0], nodes[1]); | |
391 EXPECT_EQ(nodes.back() + 3, one); // XXX: Implementation detail! | |
392 RunTest(one); | |
393 | |
394 // Branching again to the same target nodes should require only one | |
395 // new instruction, as we can reuse the previous branch's jumps. | |
396 CodeGen::Node two = | |
397 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, nodes[0], nodes[1]); | |
398 EXPECT_EQ(one + 1, two); // XXX: Implementation detail! | |
399 RunTest(two); | |
400 } | |
401 | |
402 } // namespace | |
403 } // namespace sandbox | |
OLD | NEW |