| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "sandbox/linux/seccomp-bpf/codegen.h" | |
| 6 | |
| 7 #include <linux/filter.h> | |
| 8 | |
| 9 #include <map> | |
| 10 #include <utility> | |
| 11 #include <vector> | |
| 12 | |
| 13 #include "base/macros.h" | |
| 14 #include "base/md5.h" | |
| 15 #include "base/strings/string_piece.h" | |
| 16 #include "testing/gtest/include/gtest/gtest.h" | |
| 17 | |
| 18 namespace sandbox { | |
| 19 namespace { | |
| 20 | |
| 21 // Hash provides an abstraction for building "hash trees" from BPF | |
| 22 // control flow graphs, and efficiently identifying equivalent graphs. | |
| 23 // | |
| 24 // For simplicity, we use MD5, because base happens to provide a | |
| 25 // convenient API for its use. However, any collision-resistant hash | |
| 26 // should suffice. | |
| 27 class Hash { | |
| 28 public: | |
| 29 static const Hash kZero; | |
| 30 | |
| 31 Hash() : digest_() {} | |
| 32 | |
| 33 Hash(uint16_t code, | |
| 34 uint32_t k, | |
| 35 const Hash& jt = kZero, | |
| 36 const Hash& jf = kZero) | |
| 37 : digest_() { | |
| 38 base::MD5Context ctx; | |
| 39 base::MD5Init(&ctx); | |
| 40 HashValue(&ctx, code); | |
| 41 HashValue(&ctx, k); | |
| 42 HashValue(&ctx, jt); | |
| 43 HashValue(&ctx, jf); | |
| 44 base::MD5Final(&digest_, &ctx); | |
| 45 } | |
| 46 | |
| 47 Hash(const Hash& hash) = default; | |
| 48 Hash& operator=(const Hash& rhs) = default; | |
| 49 | |
| 50 friend bool operator==(const Hash& lhs, const Hash& rhs) { | |
| 51 return lhs.Base16() == rhs.Base16(); | |
| 52 } | |
| 53 friend bool operator!=(const Hash& lhs, const Hash& rhs) { | |
| 54 return !(lhs == rhs); | |
| 55 } | |
| 56 | |
| 57 private: | |
| 58 template <typename T> | |
| 59 void HashValue(base::MD5Context* ctx, const T& value) { | |
| 60 base::MD5Update(ctx, | |
| 61 base::StringPiece(reinterpret_cast<const char*>(&value), | |
| 62 sizeof(value))); | |
| 63 } | |
| 64 | |
| 65 std::string Base16() const { | |
| 66 return MD5DigestToBase16(digest_); | |
| 67 } | |
| 68 | |
| 69 base::MD5Digest digest_; | |
| 70 }; | |
| 71 | |
| 72 const Hash Hash::kZero; | |
| 73 | |
| 74 // Sanity check that equality and inequality work on Hash as required. | |
| 75 TEST(CodeGen, HashSanity) { | |
| 76 std::vector<Hash> hashes; | |
| 77 | |
| 78 // Push a bunch of logically distinct hashes. | |
| 79 hashes.push_back(Hash::kZero); | |
| 80 for (int i = 0; i < 4; ++i) { | |
| 81 hashes.push_back(Hash(i & 1, i & 2)); | |
| 82 } | |
| 83 for (int i = 0; i < 16; ++i) { | |
| 84 hashes.push_back(Hash(i & 1, i & 2, Hash(i & 4, i & 8))); | |
| 85 } | |
| 86 for (int i = 0; i < 64; ++i) { | |
| 87 hashes.push_back( | |
| 88 Hash(i & 1, i & 2, Hash(i & 4, i & 8), Hash(i & 16, i & 32))); | |
| 89 } | |
| 90 | |
| 91 for (const Hash& a : hashes) { | |
| 92 for (const Hash& b : hashes) { | |
| 93 // Hashes should equal themselves, but not equal all others. | |
| 94 if (&a == &b) { | |
| 95 EXPECT_EQ(a, b); | |
| 96 } else { | |
| 97 EXPECT_NE(a, b); | |
| 98 } | |
| 99 } | |
| 100 } | |
| 101 } | |
| 102 | |
| 103 // ProgramTest provides a fixture for writing compiling sample | |
| 104 // programs with CodeGen and verifying the linearized output matches | |
| 105 // the input DAG. | |
| 106 class ProgramTest : public ::testing::Test { | |
| 107 protected: | |
| 108 ProgramTest() : gen_(), node_hashes_() {} | |
| 109 | |
| 110 // MakeInstruction calls CodeGen::MakeInstruction() and associated | |
| 111 // the returned address with a hash of the instruction. | |
| 112 CodeGen::Node MakeInstruction(uint16_t code, | |
| 113 uint32_t k, | |
| 114 CodeGen::Node jt = CodeGen::kNullNode, | |
| 115 CodeGen::Node jf = CodeGen::kNullNode) { | |
| 116 CodeGen::Node res = gen_.MakeInstruction(code, k, jt, jf); | |
| 117 EXPECT_NE(CodeGen::kNullNode, res); | |
| 118 | |
| 119 Hash digest(code, k, Lookup(jt), Lookup(jf)); | |
| 120 auto it = node_hashes_.insert(std::make_pair(res, digest)); | |
| 121 EXPECT_EQ(digest, it.first->second); | |
| 122 | |
| 123 return res; | |
| 124 } | |
| 125 | |
| 126 // RunTest compiles the program and verifies that the output matches | |
| 127 // what is expected. It should be called at the end of each program | |
| 128 // test case. | |
| 129 void RunTest(CodeGen::Node head) { | |
| 130 // Compile the program | |
| 131 CodeGen::Program program; | |
| 132 gen_.Compile(head, &program); | |
| 133 | |
| 134 // Walk the program backwards, and compute the hash for each instruction. | |
| 135 std::vector<Hash> prog_hashes(program.size()); | |
| 136 for (size_t i = program.size(); i > 0; --i) { | |
| 137 const sock_filter& insn = program.at(i - 1); | |
| 138 Hash& hash = prog_hashes.at(i - 1); | |
| 139 | |
| 140 if (BPF_CLASS(insn.code) == BPF_JMP) { | |
| 141 if (BPF_OP(insn.code) == BPF_JA) { | |
| 142 // The compiler adds JA instructions as needed, so skip them. | |
| 143 hash = prog_hashes.at(i + insn.k); | |
| 144 } else { | |
| 145 hash = Hash(insn.code, insn.k, prog_hashes.at(i + insn.jt), | |
| 146 prog_hashes.at(i + insn.jf)); | |
| 147 } | |
| 148 } else if (BPF_CLASS(insn.code) == BPF_RET) { | |
| 149 hash = Hash(insn.code, insn.k); | |
| 150 } else { | |
| 151 hash = Hash(insn.code, insn.k, prog_hashes.at(i)); | |
| 152 } | |
| 153 } | |
| 154 | |
| 155 EXPECT_EQ(Lookup(head), prog_hashes.at(0)); | |
| 156 } | |
| 157 | |
| 158 private: | |
| 159 const Hash& Lookup(CodeGen::Node next) const { | |
| 160 if (next == CodeGen::kNullNode) { | |
| 161 return Hash::kZero; | |
| 162 } | |
| 163 auto it = node_hashes_.find(next); | |
| 164 if (it == node_hashes_.end()) { | |
| 165 ADD_FAILURE() << "No hash found for node " << next; | |
| 166 return Hash::kZero; | |
| 167 } | |
| 168 return it->second; | |
| 169 } | |
| 170 | |
| 171 CodeGen gen_; | |
| 172 std::map<CodeGen::Node, Hash> node_hashes_; | |
| 173 | |
| 174 DISALLOW_COPY_AND_ASSIGN(ProgramTest); | |
| 175 }; | |
| 176 | |
| 177 TEST_F(ProgramTest, OneInstruction) { | |
| 178 // Create the most basic valid BPF program: | |
| 179 // RET 0 | |
| 180 CodeGen::Node head = MakeInstruction(BPF_RET + BPF_K, 0); | |
| 181 RunTest(head); | |
| 182 } | |
| 183 | |
| 184 TEST_F(ProgramTest, SimpleBranch) { | |
| 185 // Create a program with a single branch: | |
| 186 // JUMP if eq 42 then $0 else $1 | |
| 187 // 0: RET 1 | |
| 188 // 1: RET 0 | |
| 189 CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, | |
| 190 MakeInstruction(BPF_RET + BPF_K, 1), | |
| 191 MakeInstruction(BPF_RET + BPF_K, 0)); | |
| 192 RunTest(head); | |
| 193 } | |
| 194 | |
| 195 TEST_F(ProgramTest, AtypicalBranch) { | |
| 196 // Create a program with a single branch: | |
| 197 // JUMP if eq 42 then $0 else $0 | |
| 198 // 0: RET 0 | |
| 199 | |
| 200 CodeGen::Node ret = MakeInstruction(BPF_RET + BPF_K, 0); | |
| 201 CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, ret, ret); | |
| 202 | |
| 203 // N.B.: As the instructions in both sides of the branch are already | |
| 204 // the same object, we do not actually have any "mergeable" branches. | |
| 205 // This needs to be reflected in our choice of "flags". | |
| 206 RunTest(head); | |
| 207 } | |
| 208 | |
| 209 TEST_F(ProgramTest, Complex) { | |
| 210 // Creates a basic BPF program that we'll use to test some of the code: | |
| 211 // JUMP if eq 42 the $0 else $1 (insn6) | |
| 212 // 0: LD 23 (insn5) | |
| 213 // 1: JUMP if eq 42 then $2 else $4 (insn4) | |
| 214 // 2: JUMP to $3 (insn2) | |
| 215 // 3: LD 42 (insn1) | |
| 216 // RET 42 (insn0) | |
| 217 // 4: LD 42 (insn3) | |
| 218 // RET 42 (insn3+) | |
| 219 CodeGen::Node insn0 = MakeInstruction(BPF_RET + BPF_K, 42); | |
| 220 CodeGen::Node insn1 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42, insn0); | |
| 221 CodeGen::Node insn2 = insn1; // Implicit JUMP | |
| 222 | |
| 223 // We explicitly duplicate instructions to test that they're merged. | |
| 224 CodeGen::Node insn3 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42, | |
| 225 MakeInstruction(BPF_RET + BPF_K, 42)); | |
| 226 EXPECT_EQ(insn2, insn3); | |
| 227 | |
| 228 CodeGen::Node insn4 = | |
| 229 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn2, insn3); | |
| 230 CodeGen::Node insn5 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 23, insn4); | |
| 231 | |
| 232 // Force a basic block that ends in neither a jump instruction nor a return | |
| 233 // instruction. It only contains "insn5". This exercises one of the less | |
| 234 // common code paths in the topo-sort algorithm. | |
| 235 // This also gives us a diamond-shaped pattern in our graph, which stresses | |
| 236 // another aspect of the topo-sort algorithm (namely, the ability to | |
| 237 // correctly count the incoming branches for subtrees that are not disjunct). | |
| 238 CodeGen::Node insn6 = | |
| 239 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn5, insn4); | |
| 240 | |
| 241 RunTest(insn6); | |
| 242 } | |
| 243 | |
| 244 TEST_F(ProgramTest, ConfusingTails) { | |
| 245 // This simple program demonstrates https://crbug.com/351103/ | |
| 246 // The two "LOAD 0" instructions are blocks of their own. MergeTails() could | |
| 247 // be tempted to merge them since they are the same. However, they are | |
| 248 // not mergeable because they fall-through to non semantically equivalent | |
| 249 // blocks. | |
| 250 // Without the fix for this bug, this program should trigger the check in | |
| 251 // CompileAndCompare: the serialized graphs from the program and its compiled | |
| 252 // version will differ. | |
| 253 // | |
| 254 // 0) LOAD 1 // ??? | |
| 255 // 1) if A == 0x1; then JMP 2 else JMP 3 | |
| 256 // 2) LOAD 0 // System call number | |
| 257 // 3) if A == 0x2; then JMP 4 else JMP 5 | |
| 258 // 4) LOAD 0 // System call number | |
| 259 // 5) if A == 0x1; then JMP 6 else JMP 7 | |
| 260 // 6) RET 0 | |
| 261 // 7) RET 1 | |
| 262 | |
| 263 CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1); | |
| 264 CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0); | |
| 265 CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7); | |
| 266 CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5); | |
| 267 CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); | |
| 268 CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3); | |
| 269 CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); | |
| 270 CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); | |
| 271 | |
| 272 RunTest(i0); | |
| 273 } | |
| 274 | |
| 275 TEST_F(ProgramTest, ConfusingTailsBasic) { | |
| 276 // Without the fix for https://crbug.com/351103/, (see | |
| 277 // SampleProgramConfusingTails()), this would generate a cyclic graph and | |
| 278 // crash as the two "LOAD 0" instructions would get merged. | |
| 279 // | |
| 280 // 0) LOAD 1 // ??? | |
| 281 // 1) if A == 0x1; then JMP 2 else JMP 3 | |
| 282 // 2) LOAD 0 // System call number | |
| 283 // 3) if A == 0x2; then JMP 4 else JMP 5 | |
| 284 // 4) LOAD 0 // System call number | |
| 285 // 5) RET 1 | |
| 286 | |
| 287 CodeGen::Node i5 = MakeInstruction(BPF_RET + BPF_K, 1); | |
| 288 CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5); | |
| 289 CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); | |
| 290 CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3); | |
| 291 CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); | |
| 292 CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); | |
| 293 | |
| 294 RunTest(i0); | |
| 295 } | |
| 296 | |
| 297 TEST_F(ProgramTest, ConfusingTailsMergeable) { | |
| 298 // This is similar to SampleProgramConfusingTails(), except that | |
| 299 // instructions 2 and 4 are now RET instructions. | |
| 300 // In PointerCompare(), this exercises the path where two blocks are of the | |
| 301 // same length and identical and the last instruction is a JMP or RET, so the | |
| 302 // following blocks don't need to be looked at and the blocks are mergeable. | |
| 303 // | |
| 304 // 0) LOAD 1 // ??? | |
| 305 // 1) if A == 0x1; then JMP 2 else JMP 3 | |
| 306 // 2) RET 42 | |
| 307 // 3) if A == 0x2; then JMP 4 else JMP 5 | |
| 308 // 4) RET 42 | |
| 309 // 5) if A == 0x1; then JMP 6 else JMP 7 | |
| 310 // 6) RET 0 | |
| 311 // 7) RET 1 | |
| 312 | |
| 313 CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1); | |
| 314 CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0); | |
| 315 CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7); | |
| 316 CodeGen::Node i4 = MakeInstruction(BPF_RET + BPF_K, 42); | |
| 317 CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); | |
| 318 CodeGen::Node i2 = MakeInstruction(BPF_RET + BPF_K, 42); | |
| 319 CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); | |
| 320 CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); | |
| 321 | |
| 322 RunTest(i0); | |
| 323 } | |
| 324 | |
| 325 TEST_F(ProgramTest, InstructionFolding) { | |
| 326 // Check that simple instructions are folded as expected. | |
| 327 CodeGen::Node a = MakeInstruction(BPF_RET + BPF_K, 0); | |
| 328 EXPECT_EQ(a, MakeInstruction(BPF_RET + BPF_K, 0)); | |
| 329 CodeGen::Node b = MakeInstruction(BPF_RET + BPF_K, 1); | |
| 330 EXPECT_EQ(a, MakeInstruction(BPF_RET + BPF_K, 0)); | |
| 331 EXPECT_EQ(b, MakeInstruction(BPF_RET + BPF_K, 1)); | |
| 332 EXPECT_EQ(b, MakeInstruction(BPF_RET + BPF_K, 1)); | |
| 333 | |
| 334 // Check that complex sequences are folded too. | |
| 335 CodeGen::Node c = | |
| 336 MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, | |
| 337 MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, 0x100, a, b)); | |
| 338 EXPECT_EQ(c, MakeInstruction( | |
| 339 BPF_LD + BPF_W + BPF_ABS, 0, | |
| 340 MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, 0x100, a, b))); | |
| 341 | |
| 342 RunTest(c); | |
| 343 } | |
| 344 | |
| 345 TEST_F(ProgramTest, FarBranches) { | |
| 346 // BPF instructions use 8-bit fields for branch offsets, which means | |
| 347 // branch targets must be within 255 instructions of the branch | |
| 348 // instruction. CodeGen abstracts away this detail by inserting jump | |
| 349 // instructions as needed, which we test here by generating programs | |
| 350 // that should trigger any interesting boundary conditions. | |
| 351 | |
| 352 // Populate with 260 initial instruction nodes. | |
| 353 std::vector<CodeGen::Node> nodes; | |
| 354 nodes.push_back(MakeInstruction(BPF_RET + BPF_K, 0)); | |
| 355 for (size_t i = 1; i < 260; ++i) { | |
| 356 nodes.push_back( | |
| 357 MakeInstruction(BPF_ALU + BPF_ADD + BPF_K, i, nodes.back())); | |
| 358 } | |
| 359 | |
| 360 // Exhaustively test branch offsets near BPF's limits. | |
| 361 for (size_t jt = 250; jt < 260; ++jt) { | |
| 362 for (size_t jf = 250; jf < 260; ++jf) { | |
| 363 nodes.push_back(MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 0, | |
| 364 nodes.rbegin()[jt], nodes.rbegin()[jf])); | |
| 365 RunTest(nodes.back()); | |
| 366 } | |
| 367 } | |
| 368 } | |
| 369 | |
| 370 TEST_F(ProgramTest, JumpReuse) { | |
| 371 // As a code size optimization, we try to reuse jumps when possible | |
| 372 // instead of emitting new ones. Here we make sure that optimization | |
| 373 // is working as intended. | |
| 374 // | |
| 375 // NOTE: To simplify testing, we rely on implementation details | |
| 376 // about what CodeGen::Node values indicate (i.e., vector indices), | |
| 377 // but CodeGen users should treat them as opaque values. | |
| 378 | |
| 379 // Populate with 260 initial instruction nodes. | |
| 380 std::vector<CodeGen::Node> nodes; | |
| 381 nodes.push_back(MakeInstruction(BPF_RET + BPF_K, 0)); | |
| 382 for (size_t i = 1; i < 260; ++i) { | |
| 383 nodes.push_back( | |
| 384 MakeInstruction(BPF_ALU + BPF_ADD + BPF_K, i, nodes.back())); | |
| 385 } | |
| 386 | |
| 387 // Branching to nodes[0] and nodes[1] should require 3 new | |
| 388 // instructions: two far jumps plus the branch itself. | |
| 389 CodeGen::Node one = | |
| 390 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 0, nodes[0], nodes[1]); | |
| 391 EXPECT_EQ(nodes.back() + 3, one); // XXX: Implementation detail! | |
| 392 RunTest(one); | |
| 393 | |
| 394 // Branching again to the same target nodes should require only one | |
| 395 // new instruction, as we can reuse the previous branch's jumps. | |
| 396 CodeGen::Node two = | |
| 397 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, nodes[0], nodes[1]); | |
| 398 EXPECT_EQ(one + 1, two); // XXX: Implementation detail! | |
| 399 RunTest(two); | |
| 400 } | |
| 401 | |
| 402 } // namespace | |
| 403 } // namespace sandbox | |
| OLD | NEW |