| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "sandbox/linux/seccomp-bpf/codegen.h" | 5 #include "sandbox/linux/seccomp-bpf/codegen.h" |
| 6 | 6 |
| 7 #include <linux/filter.h> | 7 #include <linux/filter.h> |
| 8 | 8 |
| 9 #include <set> | 9 #include <map> |
| 10 #include <string> | 10 #include <utility> |
| 11 #include <vector> | 11 #include <vector> |
| 12 | 12 |
| 13 #include "sandbox/linux/seccomp-bpf/basicblock.h" | 13 #include "base/macros.h" |
| 14 #include "sandbox/linux/seccomp-bpf/errorcode.h" | 14 #include "base/md5.h" |
| 15 #include "sandbox/linux/seccomp-bpf/instruction.h" | 15 #include "base/strings/string_piece.h" |
| 16 #include "testing/gtest/include/gtest/gtest.h" | 16 #include "testing/gtest/include/gtest/gtest.h" |
| 17 | 17 |
| 18 namespace sandbox { | 18 namespace sandbox { |
| 19 | |
| 20 // We want to access some of the private methods in the code generator. We | |
| 21 // do so by defining a "friend" that makes these methods public for us. | |
| 22 class CodeGenUnittestHelper : public CodeGen { | |
| 23 public: | |
| 24 using CodeGen::CutGraphIntoBasicBlocks; | |
| 25 using CodeGen::FindBranchTargets; | |
| 26 using CodeGen::MergeTails; | |
| 27 }; | |
| 28 | |
| 29 namespace { | 19 namespace { |
| 30 | 20 |
| 31 enum { | 21 // Hash provides an abstraction for building "hash trees" from BPF |
| 32 NO_FLAGS = 0x0000, | 22 // control flow graphs, and efficiently identifying equivalent graphs. |
| 33 HAS_MERGEABLE_TAILS = 0x0001, | 23 // |
| 34 }; | 24 // For simplicity, we use MD5, because base happens to provide a |
| 25 // convenient API for its use. However, any collision-resistant hash |
| 26 // should suffice. |
| 27 class Hash { |
| 28 public: |
| 29 static const Hash kZero; |
| 35 | 30 |
| 36 using ProgramTestFunc = void (*)(CodeGenUnittestHelper* gen, | 31 Hash() : digest_() {} |
| 37 CodeGen::Node head, | |
| 38 int flags); | |
| 39 | 32 |
| 40 class ProgramTest : public ::testing::TestWithParam<ProgramTestFunc> { | 33 Hash(uint16_t code, |
| 41 protected: | 34 uint32_t k, |
| 42 ProgramTest() : gen_() {} | 35 const Hash& jt = kZero, |
| 43 | 36 const Hash& jf = kZero) |
| 44 // RunTest runs the test function argument. It should be called at | 37 : digest_() { |
| 45 // the end of each program test case. | 38 base::MD5Context ctx; |
| 46 void RunTest(CodeGen::Node head, int flags) { | 39 base::MD5Init(&ctx); |
| 47 GetParam()(&gen_, head, flags); | 40 HashValue(&ctx, code); |
| 41 HashValue(&ctx, k); |
| 42 HashValue(&ctx, jt); |
| 43 HashValue(&ctx, jf); |
| 44 base::MD5Final(&digest_, &ctx); |
| 48 } | 45 } |
| 49 | 46 |
| 50 CodeGen::Node MakeInstruction(uint16_t code, | 47 Hash(const Hash& hash) = default; |
| 51 uint32_t k, | 48 Hash& operator=(const Hash& rhs) = default; |
| 52 CodeGen::Node jt = nullptr, | 49 |
| 53 CodeGen::Node jf = nullptr) { | 50 friend bool operator==(const Hash& lhs, const Hash& rhs) { |
| 54 CodeGen::Node ret = gen_.MakeInstruction(code, k, jt, jf); | 51 return lhs.Base16() == rhs.Base16(); |
| 55 EXPECT_NE(nullptr, ret); | 52 } |
| 56 EXPECT_EQ(code, ret->code); | 53 friend bool operator!=(const Hash& lhs, const Hash& rhs) { |
| 57 EXPECT_EQ(k, ret->k); | 54 return !(lhs == rhs); |
| 58 if (BPF_CLASS(code) == BPF_JMP) { | |
| 59 EXPECT_EQ(nullptr, ret->next); | |
| 60 EXPECT_EQ(jt, ret->jt_ptr); | |
| 61 EXPECT_EQ(jf, ret->jf_ptr); | |
| 62 } else { | |
| 63 EXPECT_EQ(jt, ret->next); | |
| 64 EXPECT_EQ(nullptr, ret->jt_ptr); | |
| 65 EXPECT_EQ(nullptr, ret->jf_ptr); | |
| 66 } | |
| 67 return ret; | |
| 68 } | 55 } |
| 69 | 56 |
| 70 private: | 57 private: |
| 71 CodeGenUnittestHelper gen_; | 58 template <typename T> |
| 59 void HashValue(base::MD5Context* ctx, const T& value) { |
| 60 base::MD5Update(ctx, |
| 61 base::StringPiece(reinterpret_cast<const char*>(&value), |
| 62 sizeof(value))); |
| 63 } |
| 64 |
| 65 std::string Base16() const { |
| 66 return MD5DigestToBase16(digest_); |
| 67 } |
| 68 |
| 69 base::MD5Digest digest_; |
| 72 }; | 70 }; |
| 73 | 71 |
| 74 TEST_P(ProgramTest, OneInstruction) { | 72 const Hash Hash::kZero; |
| 73 |
| 74 // Sanity check that equality and inequality work on Hash as required. |
| 75 TEST(CodeGen, HashSanity) { |
| 76 std::vector<Hash> hashes; |
| 77 |
| 78 // Push a bunch of logically distinct hashes. |
| 79 hashes.push_back(Hash::kZero); |
| 80 for (int i = 0; i < 4; ++i) { |
| 81 hashes.push_back(Hash(i & 1, i & 2)); |
| 82 } |
| 83 for (int i = 0; i < 16; ++i) { |
| 84 hashes.push_back(Hash(i & 1, i & 2, Hash(i & 4, i & 8))); |
| 85 } |
| 86 for (int i = 0; i < 64; ++i) { |
| 87 hashes.push_back( |
| 88 Hash(i & 1, i & 2, Hash(i & 4, i & 8), Hash(i & 16, i & 32))); |
| 89 } |
| 90 |
| 91 for (const Hash& a : hashes) { |
| 92 for (const Hash& b : hashes) { |
| 93 // Hashes should equal themselves, but not equal all others. |
| 94 if (&a == &b) { |
| 95 EXPECT_EQ(a, b); |
| 96 } else { |
| 97 EXPECT_NE(a, b); |
| 98 } |
| 99 } |
| 100 } |
| 101 } |
| 102 |
| 103 // ProgramTest provides a fixture for writing compiling sample |
| 104 // programs with CodeGen and verifying the linearized output matches |
| 105 // the input DAG. |
| 106 class ProgramTest : public ::testing::Test { |
| 107 protected: |
| 108 ProgramTest() : gen_(), node_hashes_() {} |
| 109 |
| 110 // MakeInstruction calls CodeGen::MakeInstruction() and associated |
| 111 // the returned address with a hash of the instruction. |
| 112 CodeGen::Node MakeInstruction(uint16_t code, |
| 113 uint32_t k, |
| 114 CodeGen::Node jt = CodeGen::kNullNode, |
| 115 CodeGen::Node jf = CodeGen::kNullNode) { |
| 116 CodeGen::Node res = gen_.MakeInstruction(code, k, jt, jf); |
| 117 EXPECT_NE(CodeGen::kNullNode, res); |
| 118 |
| 119 Hash digest; |
| 120 if (code == BPF_JMP + BPF_JA) { |
| 121 // TODO(mdempsky): Disallow use of JA. |
| 122 digest = Lookup(jt); |
| 123 } else { |
| 124 digest = Hash(code, k, Lookup(jt), Lookup(jf)); |
| 125 } |
| 126 auto it = node_hashes_.insert(std::make_pair(res, digest)); |
| 127 EXPECT_EQ(digest, it.first->second); |
| 128 |
| 129 return res; |
| 130 } |
| 131 |
| 132 // RunTest compiles the program and verifies that the output matches |
| 133 // what is expected. It should be called at the end of each program |
| 134 // test case. |
| 135 void RunTest(CodeGen::Node head) { |
| 136 // Compile the program |
| 137 CodeGen::Program program; |
| 138 gen_.Compile(head, &program); |
| 139 |
| 140 // Walk the program backwards, and compute the hash for each instruction. |
| 141 std::vector<Hash> prog_hashes(program.size()); |
| 142 for (size_t i = program.size(); i > 0; --i) { |
| 143 const sock_filter& insn = program.at(i - 1); |
| 144 Hash& hash = prog_hashes.at(i - 1); |
| 145 |
| 146 if (BPF_CLASS(insn.code) == BPF_JMP) { |
| 147 if (BPF_OP(insn.code) == BPF_JA) { |
| 148 // The compiler adds JA instructions as needed, so skip them. |
| 149 hash = prog_hashes.at(i + insn.k); |
| 150 } else { |
| 151 hash = Hash(insn.code, insn.k, prog_hashes.at(i + insn.jt), |
| 152 prog_hashes.at(i + insn.jf)); |
| 153 } |
| 154 } else if (BPF_CLASS(insn.code) == BPF_RET) { |
| 155 hash = Hash(insn.code, insn.k); |
| 156 } else { |
| 157 hash = Hash(insn.code, insn.k, prog_hashes.at(i)); |
| 158 } |
| 159 } |
| 160 |
| 161 EXPECT_EQ(Lookup(head), prog_hashes.at(0)); |
| 162 } |
| 163 |
| 164 private: |
| 165 const Hash& Lookup(CodeGen::Node next) const { |
| 166 if (next == CodeGen::kNullNode) { |
| 167 return Hash::kZero; |
| 168 } |
| 169 auto it = node_hashes_.find(next); |
| 170 if (it == node_hashes_.end()) { |
| 171 ADD_FAILURE() << "No hash found for node " << next; |
| 172 return Hash::kZero; |
| 173 } |
| 174 return it->second; |
| 175 } |
| 176 |
| 177 CodeGen gen_; |
| 178 std::map<CodeGen::Node, Hash> node_hashes_; |
| 179 |
| 180 DISALLOW_COPY_AND_ASSIGN(ProgramTest); |
| 181 }; |
| 182 |
| 183 TEST_F(ProgramTest, OneInstruction) { |
| 75 // Create the most basic valid BPF program: | 184 // Create the most basic valid BPF program: |
| 76 // RET 0 | 185 // RET 0 |
| 77 CodeGen::Node head = MakeInstruction(BPF_RET + BPF_K, 0); | 186 CodeGen::Node head = MakeInstruction(BPF_RET + BPF_K, 0); |
| 78 RunTest(head, NO_FLAGS); | 187 RunTest(head); |
| 79 } | 188 } |
| 80 | 189 |
| 81 TEST_P(ProgramTest, SimpleBranch) { | 190 TEST_F(ProgramTest, SimpleBranch) { |
| 82 // Create a program with a single branch: | 191 // Create a program with a single branch: |
| 83 // JUMP if eq 42 then $0 else $1 | 192 // JUMP if eq 42 then $0 else $1 |
| 84 // 0: RET 1 | 193 // 0: RET 1 |
| 85 // 1: RET 0 | 194 // 1: RET 0 |
| 86 CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, | 195 CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, |
| 87 MakeInstruction(BPF_RET + BPF_K, 1), | 196 MakeInstruction(BPF_RET + BPF_K, 1), |
| 88 MakeInstruction(BPF_RET + BPF_K, 0)); | 197 MakeInstruction(BPF_RET + BPF_K, 0)); |
| 89 RunTest(head, NO_FLAGS); | 198 RunTest(head); |
| 90 } | 199 } |
| 91 | 200 |
| 92 TEST_P(ProgramTest, AtypicalBranch) { | 201 TEST_F(ProgramTest, AtypicalBranch) { |
| 93 // Create a program with a single branch: | 202 // Create a program with a single branch: |
| 94 // JUMP if eq 42 then $0 else $0 | 203 // JUMP if eq 42 then $0 else $0 |
| 95 // 0: RET 0 | 204 // 0: RET 0 |
| 96 | 205 |
| 97 CodeGen::Node ret = MakeInstruction(BPF_RET + BPF_K, 0); | 206 CodeGen::Node ret = MakeInstruction(BPF_RET + BPF_K, 0); |
| 98 CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, ret, ret); | 207 CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, ret, ret); |
| 99 | 208 |
| 100 // N.B.: As the instructions in both sides of the branch are already | 209 // N.B.: As the instructions in both sides of the branch are already |
| 101 // the same object, we do not actually have any "mergeable" branches. | 210 // the same object, we do not actually have any "mergeable" branches. |
| 102 // This needs to be reflected in our choice of "flags". | 211 // This needs to be reflected in our choice of "flags". |
| 103 RunTest(head, NO_FLAGS); | 212 RunTest(head); |
| 104 } | 213 } |
| 105 | 214 |
| 106 TEST_P(ProgramTest, Complex) { | 215 TEST_F(ProgramTest, Complex) { |
| 107 // Creates a basic BPF program that we'll use to test some of the code: | 216 // Creates a basic BPF program that we'll use to test some of the code: |
| 108 // JUMP if eq 42 the $0 else $1 (insn6) | 217 // JUMP if eq 42 the $0 else $1 (insn6) |
| 109 // 0: LD 23 (insn5) | 218 // 0: LD 23 (insn5) |
| 110 // 1: JUMP if eq 42 then $2 else $4 (insn4) | 219 // 1: JUMP if eq 42 then $2 else $4 (insn4) |
| 111 // 2: JUMP to $3 (insn2) | 220 // 2: JUMP to $3 (insn2) |
| 112 // 3: LD 42 (insn1) | 221 // 3: LD 42 (insn1) |
| 113 // RET 42 (insn0) | 222 // RET 42 (insn0) |
| 114 // 4: LD 42 (insn3) | 223 // 4: LD 42 (insn3) |
| 115 // RET 42 (insn3+) | 224 // RET 42 (insn3+) |
| 116 CodeGen::Node insn0 = MakeInstruction(BPF_RET + BPF_K, 42); | 225 CodeGen::Node insn0 = MakeInstruction(BPF_RET + BPF_K, 42); |
| (...skipping 11 matching lines...) Expand all Loading... |
| 128 | 237 |
| 129 // Force a basic block that ends in neither a jump instruction nor a return | 238 // Force a basic block that ends in neither a jump instruction nor a return |
| 130 // instruction. It only contains "insn5". This exercises one of the less | 239 // instruction. It only contains "insn5". This exercises one of the less |
| 131 // common code paths in the topo-sort algorithm. | 240 // common code paths in the topo-sort algorithm. |
| 132 // This also gives us a diamond-shaped pattern in our graph, which stresses | 241 // This also gives us a diamond-shaped pattern in our graph, which stresses |
| 133 // another aspect of the topo-sort algorithm (namely, the ability to | 242 // another aspect of the topo-sort algorithm (namely, the ability to |
| 134 // correctly count the incoming branches for subtrees that are not disjunct). | 243 // correctly count the incoming branches for subtrees that are not disjunct). |
| 135 CodeGen::Node insn6 = | 244 CodeGen::Node insn6 = |
| 136 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn5, insn4); | 245 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn5, insn4); |
| 137 | 246 |
| 138 RunTest(insn6, HAS_MERGEABLE_TAILS); | 247 RunTest(insn6); |
| 139 } | 248 } |
| 140 | 249 |
| 141 TEST_P(ProgramTest, ConfusingTails) { | 250 TEST_F(ProgramTest, ConfusingTails) { |
| 142 // This simple program demonstrates https://crbug.com/351103/ | 251 // This simple program demonstrates https://crbug.com/351103/ |
| 143 // The two "LOAD 0" instructions are blocks of their own. MergeTails() could | 252 // The two "LOAD 0" instructions are blocks of their own. MergeTails() could |
| 144 // be tempted to merge them since they are the same. However, they are | 253 // be tempted to merge them since they are the same. However, they are |
| 145 // not mergeable because they fall-through to non semantically equivalent | 254 // not mergeable because they fall-through to non semantically equivalent |
| 146 // blocks. | 255 // blocks. |
| 147 // Without the fix for this bug, this program should trigger the check in | 256 // Without the fix for this bug, this program should trigger the check in |
| 148 // CompileAndCompare: the serialized graphs from the program and its compiled | 257 // CompileAndCompare: the serialized graphs from the program and its compiled |
| 149 // version will differ. | 258 // version will differ. |
| 150 // | 259 // |
| 151 // 0) LOAD 1 // ??? | 260 // 0) LOAD 1 // ??? |
| 152 // 1) if A == 0x1; then JMP 2 else JMP 3 | 261 // 1) if A == 0x1; then JMP 2 else JMP 3 |
| 153 // 2) LOAD 0 // System call number | 262 // 2) LOAD 0 // System call number |
| 154 // 3) if A == 0x2; then JMP 4 else JMP 5 | 263 // 3) if A == 0x2; then JMP 4 else JMP 5 |
| 155 // 4) LOAD 0 // System call number | 264 // 4) LOAD 0 // System call number |
| 156 // 5) if A == 0x1; then JMP 6 else JMP 7 | 265 // 5) if A == 0x1; then JMP 6 else JMP 7 |
| 157 // 6) RET 0 | 266 // 6) RET 0 |
| 158 // 7) RET 1 | 267 // 7) RET 1 |
| 159 | 268 |
| 160 CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1); | 269 CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1); |
| 161 CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0); | 270 CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0); |
| 162 CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7); | 271 CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7); |
| 163 CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5); | 272 CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5); |
| 164 CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); | 273 CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); |
| 165 CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3); | 274 CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3); |
| 166 CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); | 275 CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); |
| 167 CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); | 276 CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); |
| 168 | 277 |
| 169 RunTest(i0, NO_FLAGS); | 278 RunTest(i0); |
| 170 } | 279 } |
| 171 | 280 |
| 172 TEST_P(ProgramTest, ConfusingTailsBasic) { | 281 TEST_F(ProgramTest, ConfusingTailsBasic) { |
| 173 // Without the fix for https://crbug.com/351103/, (see | 282 // Without the fix for https://crbug.com/351103/, (see |
| 174 // SampleProgramConfusingTails()), this would generate a cyclic graph and | 283 // SampleProgramConfusingTails()), this would generate a cyclic graph and |
| 175 // crash as the two "LOAD 0" instructions would get merged. | 284 // crash as the two "LOAD 0" instructions would get merged. |
| 176 // | 285 // |
| 177 // 0) LOAD 1 // ??? | 286 // 0) LOAD 1 // ??? |
| 178 // 1) if A == 0x1; then JMP 2 else JMP 3 | 287 // 1) if A == 0x1; then JMP 2 else JMP 3 |
| 179 // 2) LOAD 0 // System call number | 288 // 2) LOAD 0 // System call number |
| 180 // 3) if A == 0x2; then JMP 4 else JMP 5 | 289 // 3) if A == 0x2; then JMP 4 else JMP 5 |
| 181 // 4) LOAD 0 // System call number | 290 // 4) LOAD 0 // System call number |
| 182 // 5) RET 1 | 291 // 5) RET 1 |
| 183 | 292 |
| 184 CodeGen::Node i5 = MakeInstruction(BPF_RET + BPF_K, 1); | 293 CodeGen::Node i5 = MakeInstruction(BPF_RET + BPF_K, 1); |
| 185 CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5); | 294 CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5); |
| 186 CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); | 295 CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); |
| 187 CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3); | 296 CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3); |
| 188 CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); | 297 CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); |
| 189 CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); | 298 CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); |
| 190 | 299 |
| 191 RunTest(i0, NO_FLAGS); | 300 RunTest(i0); |
| 192 } | 301 } |
| 193 | 302 |
| 194 TEST_P(ProgramTest, ConfusingTailsMergeable) { | 303 TEST_F(ProgramTest, ConfusingTailsMergeable) { |
| 195 // This is similar to SampleProgramConfusingTails(), except that | 304 // This is similar to SampleProgramConfusingTails(), except that |
| 196 // instructions 2 and 4 are now RET instructions. | 305 // instructions 2 and 4 are now RET instructions. |
| 197 // In PointerCompare(), this exercises the path where two blocks are of the | 306 // In PointerCompare(), this exercises the path where two blocks are of the |
| 198 // same length and identical and the last instruction is a JMP or RET, so the | 307 // same length and identical and the last instruction is a JMP or RET, so the |
| 199 // following blocks don't need to be looked at and the blocks are mergeable. | 308 // following blocks don't need to be looked at and the blocks are mergeable. |
| 200 // | 309 // |
| 201 // 0) LOAD 1 // ??? | 310 // 0) LOAD 1 // ??? |
| 202 // 1) if A == 0x1; then JMP 2 else JMP 3 | 311 // 1) if A == 0x1; then JMP 2 else JMP 3 |
| 203 // 2) RET 42 | 312 // 2) RET 42 |
| 204 // 3) if A == 0x2; then JMP 4 else JMP 5 | 313 // 3) if A == 0x2; then JMP 4 else JMP 5 |
| 205 // 4) RET 42 | 314 // 4) RET 42 |
| 206 // 5) if A == 0x1; then JMP 6 else JMP 7 | 315 // 5) if A == 0x1; then JMP 6 else JMP 7 |
| 207 // 6) RET 0 | 316 // 6) RET 0 |
| 208 // 7) RET 1 | 317 // 7) RET 1 |
| 209 | 318 |
| 210 CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1); | 319 CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1); |
| 211 CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0); | 320 CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0); |
| 212 CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7); | 321 CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7); |
| 213 CodeGen::Node i4 = MakeInstruction(BPF_RET + BPF_K, 42); | 322 CodeGen::Node i4 = MakeInstruction(BPF_RET + BPF_K, 42); |
| 214 CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); | 323 CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); |
| 215 CodeGen::Node i2 = MakeInstruction(BPF_RET + BPF_K, 42); | 324 CodeGen::Node i2 = MakeInstruction(BPF_RET + BPF_K, 42); |
| 216 CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); | 325 CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); |
| 217 CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); | 326 CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); |
| 218 | 327 |
| 219 RunTest(i0, HAS_MERGEABLE_TAILS); | 328 RunTest(i0); |
| 220 } | 329 } |
| 221 | 330 |
| 222 void MakeInstruction(CodeGenUnittestHelper* codegen, | |
| 223 CodeGen::Node program, | |
| 224 int) { | |
| 225 // Nothing to do here | |
| 226 } | |
| 227 | |
| 228 void FindBranchTargets(CodeGenUnittestHelper* codegen, CodeGen::Node prg, int) { | |
| 229 BranchTargets branch_targets; | |
| 230 codegen->FindBranchTargets(*prg, &branch_targets); | |
| 231 | |
| 232 // Verifying the general properties that should be true for every | |
| 233 // well-formed BPF program. | |
| 234 // Perform a depth-first traversal of the BPF program an verify that all | |
| 235 // targets of BPF_JMP instructions are represented in the "branch_targets". | |
| 236 // At the same time, compute a set of both the branch targets and all the | |
| 237 // instructions in the program. | |
| 238 std::vector<CodeGen::Node> stack; | |
| 239 std::set<CodeGen::Node> all_instructions; | |
| 240 std::set<CodeGen::Node> target_instructions; | |
| 241 BranchTargets::const_iterator end = branch_targets.end(); | |
| 242 for (CodeGen::Node insn = prg;;) { | |
| 243 all_instructions.insert(insn); | |
| 244 if (BPF_CLASS(insn->code) == BPF_JMP) { | |
| 245 target_instructions.insert(insn->jt_ptr); | |
| 246 ASSERT_TRUE(insn->jt_ptr != NULL); | |
| 247 ASSERT_TRUE(branch_targets.find(insn->jt_ptr) != end); | |
| 248 if (BPF_OP(insn->code) != BPF_JA) { | |
| 249 target_instructions.insert(insn->jf_ptr); | |
| 250 ASSERT_TRUE(insn->jf_ptr != NULL); | |
| 251 ASSERT_TRUE(branch_targets.find(insn->jf_ptr) != end); | |
| 252 stack.push_back(insn->jf_ptr); | |
| 253 } | |
| 254 insn = insn->jt_ptr; | |
| 255 } else if (BPF_CLASS(insn->code) == BPF_RET) { | |
| 256 ASSERT_TRUE(insn->next == NULL); | |
| 257 if (stack.empty()) { | |
| 258 break; | |
| 259 } | |
| 260 insn = stack.back(); | |
| 261 stack.pop_back(); | |
| 262 } else { | |
| 263 ASSERT_TRUE(insn->next != NULL); | |
| 264 insn = insn->next; | |
| 265 } | |
| 266 } | |
| 267 ASSERT_TRUE(target_instructions.size() == branch_targets.size()); | |
| 268 | |
| 269 // We can now subtract the set of the branch targets from the set of all | |
| 270 // instructions. This gives us a set with the instructions that nobody | |
| 271 // ever jumps to. Verify that they are no included in the | |
| 272 // "branch_targets" that FindBranchTargets() computed for us. | |
| 273 Instructions non_target_instructions(all_instructions.size() - | |
| 274 target_instructions.size()); | |
| 275 set_difference(all_instructions.begin(), | |
| 276 all_instructions.end(), | |
| 277 target_instructions.begin(), | |
| 278 target_instructions.end(), | |
| 279 non_target_instructions.begin()); | |
| 280 for (Instructions::const_iterator iter = non_target_instructions.begin(); | |
| 281 iter != non_target_instructions.end(); | |
| 282 ++iter) { | |
| 283 ASSERT_TRUE(branch_targets.find(*iter) == end); | |
| 284 } | |
| 285 } | |
| 286 | |
| 287 void CutGraphIntoBasicBlocks(CodeGenUnittestHelper* codegen, | |
| 288 CodeGen::Node prg, | |
| 289 int) { | |
| 290 BranchTargets branch_targets; | |
| 291 codegen->FindBranchTargets(*prg, &branch_targets); | |
| 292 TargetsToBlocks all_blocks; | |
| 293 BasicBlock* first_block = | |
| 294 codegen->CutGraphIntoBasicBlocks(prg, branch_targets, &all_blocks); | |
| 295 ASSERT_TRUE(first_block != NULL); | |
| 296 ASSERT_TRUE(first_block->instructions.size() > 0); | |
| 297 CodeGen::Node first_insn = first_block->instructions[0]; | |
| 298 | |
| 299 // Basic blocks are supposed to start with a branch target and end with | |
| 300 // either a jump or a return instruction. It can also end, if the next | |
| 301 // instruction forms the beginning of a new basic block. There should be | |
| 302 // no other jumps or return instructions in the middle of a basic block. | |
| 303 for (TargetsToBlocks::const_iterator bb_iter = all_blocks.begin(); | |
| 304 bb_iter != all_blocks.end(); | |
| 305 ++bb_iter) { | |
| 306 BasicBlock* bb = bb_iter->second; | |
| 307 ASSERT_TRUE(bb != NULL); | |
| 308 ASSERT_TRUE(bb->instructions.size() > 0); | |
| 309 CodeGen::Node insn = bb->instructions[0]; | |
| 310 ASSERT_TRUE(insn == first_insn || | |
| 311 branch_targets.find(insn) != branch_targets.end()); | |
| 312 for (Instructions::const_iterator insn_iter = bb->instructions.begin();;) { | |
| 313 insn = *insn_iter; | |
| 314 if (++insn_iter != bb->instructions.end()) { | |
| 315 ASSERT_TRUE(BPF_CLASS(insn->code) != BPF_JMP); | |
| 316 ASSERT_TRUE(BPF_CLASS(insn->code) != BPF_RET); | |
| 317 } else { | |
| 318 ASSERT_TRUE(BPF_CLASS(insn->code) == BPF_JMP || | |
| 319 BPF_CLASS(insn->code) == BPF_RET || | |
| 320 branch_targets.find(insn->next) != branch_targets.end()); | |
| 321 break; | |
| 322 } | |
| 323 ASSERT_TRUE(branch_targets.find(*insn_iter) == branch_targets.end()); | |
| 324 } | |
| 325 } | |
| 326 } | |
| 327 | |
| 328 void MergeTails(CodeGenUnittestHelper* codegen, CodeGen::Node prg, int flags) { | |
| 329 BranchTargets branch_targets; | |
| 330 codegen->FindBranchTargets(*prg, &branch_targets); | |
| 331 TargetsToBlocks all_blocks; | |
| 332 BasicBlock* first_block = | |
| 333 codegen->CutGraphIntoBasicBlocks(prg, branch_targets, &all_blocks); | |
| 334 | |
| 335 // The shape of our graph and thus the function of our program should | |
| 336 // still be unchanged after we run MergeTails(). We verify this by | |
| 337 // serializing the graph and verifying that it is still the same. | |
| 338 // We also verify that at least some of the edges changed because of | |
| 339 // tail merging. | |
| 340 std::string graph[2]; | |
| 341 std::string edges[2]; | |
| 342 | |
| 343 // The loop executes twice. After the first run, we call MergeTails() on | |
| 344 // our graph. | |
| 345 for (int i = 0;;) { | |
| 346 // Traverse the entire program in depth-first order. | |
| 347 std::vector<BasicBlock*> stack; | |
| 348 for (BasicBlock* bb = first_block;;) { | |
| 349 // Serialize the instructions in this basic block. In general, we only | |
| 350 // need to serialize "code" and "k"; except for a BPF_JA instruction | |
| 351 // where "k" isn't set. | |
| 352 // The stream of instructions should be unchanged after MergeTails(). | |
| 353 for (Instructions::const_iterator iter = bb->instructions.begin(); | |
| 354 iter != bb->instructions.end(); | |
| 355 ++iter) { | |
| 356 graph[i].append(reinterpret_cast<char*>(&(*iter)->code), | |
| 357 sizeof((*iter)->code)); | |
| 358 if (BPF_CLASS((*iter)->code) != BPF_JMP || | |
| 359 BPF_OP((*iter)->code) != BPF_JA) { | |
| 360 graph[i].append(reinterpret_cast<char*>(&(*iter)->k), | |
| 361 sizeof((*iter)->k)); | |
| 362 } | |
| 363 } | |
| 364 | |
| 365 // Also serialize the addresses the basic blocks as we encounter them. | |
| 366 // This will change as basic blocks are coalesed by MergeTails(). | |
| 367 edges[i].append(reinterpret_cast<char*>(&bb), sizeof(bb)); | |
| 368 | |
| 369 // Depth-first traversal of the graph. We only ever need to look at the | |
| 370 // very last instruction in the basic block, as that is the only one that | |
| 371 // can change code flow. | |
| 372 CodeGen::Node insn = bb->instructions.back(); | |
| 373 if (BPF_CLASS(insn->code) == BPF_JMP) { | |
| 374 // For jump instructions, we need to remember the "false" branch while | |
| 375 // traversing the "true" branch. This is not necessary for BPF_JA which | |
| 376 // only has a single branch. | |
| 377 if (BPF_OP(insn->code) != BPF_JA) { | |
| 378 stack.push_back(all_blocks[insn->jf_ptr]); | |
| 379 } | |
| 380 bb = all_blocks[insn->jt_ptr]; | |
| 381 } else if (BPF_CLASS(insn->code) == BPF_RET) { | |
| 382 // After a BPF_RET, see if we need to back track. | |
| 383 if (stack.empty()) { | |
| 384 break; | |
| 385 } | |
| 386 bb = stack.back(); | |
| 387 stack.pop_back(); | |
| 388 } else { | |
| 389 // For "normal" instructions, just follow to the next basic block. | |
| 390 bb = all_blocks[insn->next]; | |
| 391 } | |
| 392 } | |
| 393 | |
| 394 // Our loop runs exactly two times. | |
| 395 if (++i > 1) { | |
| 396 break; | |
| 397 } | |
| 398 codegen->MergeTails(&all_blocks); | |
| 399 } | |
| 400 ASSERT_TRUE(graph[0] == graph[1]); | |
| 401 if (flags & HAS_MERGEABLE_TAILS) { | |
| 402 ASSERT_TRUE(edges[0] != edges[1]); | |
| 403 } else { | |
| 404 ASSERT_TRUE(edges[0] == edges[1]); | |
| 405 } | |
| 406 } | |
| 407 | |
| 408 void CompileAndCompare(CodeGenUnittestHelper* codegen, CodeGen::Node prg, int) { | |
| 409 // TopoSortBasicBlocks() has internal checks that cause it to fail, if it | |
| 410 // detects a problem. Typically, if anything goes wrong, this looks to the | |
| 411 // TopoSort algorithm as if there had been cycles in the input data. | |
| 412 // This provides a pretty good unittest. | |
| 413 // We hand-crafted the program returned by SampleProgram() to exercise | |
| 414 // several of the more interesting code-paths. See comments in | |
| 415 // SampleProgram() for details. | |
| 416 // In addition to relying on the internal consistency checks in the compiler, | |
| 417 // we also serialize the graph and the resulting BPF program and compare | |
| 418 // them. With the exception of BPF_JA instructions that might have been | |
| 419 // inserted, both instruction streams should be equivalent. | |
| 420 // As Compile() modifies the instructions, we have to serialize the graph | |
| 421 // before calling Compile(). | |
| 422 std::string source; | |
| 423 Instructions source_stack; | |
| 424 for (const Instruction* insn = prg, *next; insn; insn = next) { | |
| 425 if (BPF_CLASS(insn->code) == BPF_JMP) { | |
| 426 if (BPF_OP(insn->code) == BPF_JA) { | |
| 427 // Do not serialize BPF_JA instructions (see above). | |
| 428 next = insn->jt_ptr; | |
| 429 continue; | |
| 430 } else { | |
| 431 source_stack.push_back(insn->jf_ptr); | |
| 432 next = insn->jt_ptr; | |
| 433 } | |
| 434 } else if (BPF_CLASS(insn->code) == BPF_RET) { | |
| 435 if (source_stack.empty()) { | |
| 436 next = NULL; | |
| 437 } else { | |
| 438 next = source_stack.back(); | |
| 439 source_stack.pop_back(); | |
| 440 } | |
| 441 } else { | |
| 442 next = insn->next; | |
| 443 } | |
| 444 // Only serialize "code" and "k". That's all the information we need to | |
| 445 // compare. The rest of the information is encoded in the order of | |
| 446 // instructions. | |
| 447 source.append(reinterpret_cast<const char*>(&insn->code), | |
| 448 sizeof(insn->code)); | |
| 449 source.append(reinterpret_cast<const char*>(&insn->k), sizeof(insn->k)); | |
| 450 } | |
| 451 | |
| 452 // Compile the program | |
| 453 CodeGen::Program bpf; | |
| 454 codegen->Compile(prg, &bpf); | |
| 455 | |
| 456 // Serialize the resulting BPF instructions. | |
| 457 std::string assembly; | |
| 458 std::vector<int> assembly_stack; | |
| 459 for (int idx = 0; idx >= 0;) { | |
| 460 ASSERT_TRUE(idx < (int)bpf.size()); | |
| 461 struct sock_filter& insn = bpf[idx]; | |
| 462 if (BPF_CLASS(insn.code) == BPF_JMP) { | |
| 463 if (BPF_OP(insn.code) == BPF_JA) { | |
| 464 // Do not serialize BPF_JA instructions (see above). | |
| 465 idx += insn.k + 1; | |
| 466 continue; | |
| 467 } else { | |
| 468 assembly_stack.push_back(idx + insn.jf + 1); | |
| 469 idx += insn.jt + 1; | |
| 470 } | |
| 471 } else if (BPF_CLASS(insn.code) == BPF_RET) { | |
| 472 if (assembly_stack.empty()) { | |
| 473 idx = -1; | |
| 474 } else { | |
| 475 idx = assembly_stack.back(); | |
| 476 assembly_stack.pop_back(); | |
| 477 } | |
| 478 } else { | |
| 479 ++idx; | |
| 480 } | |
| 481 // Serialize the same information that we serialized before compilation. | |
| 482 assembly.append(reinterpret_cast<char*>(&insn.code), sizeof(insn.code)); | |
| 483 assembly.append(reinterpret_cast<char*>(&insn.k), sizeof(insn.k)); | |
| 484 } | |
| 485 ASSERT_TRUE(source == assembly); | |
| 486 } | |
| 487 | |
| 488 const ProgramTestFunc kProgramTestFuncs[] = { | |
| 489 MakeInstruction, | |
| 490 FindBranchTargets, | |
| 491 CutGraphIntoBasicBlocks, | |
| 492 MergeTails, | |
| 493 CompileAndCompare, | |
| 494 }; | |
| 495 | |
| 496 INSTANTIATE_TEST_CASE_P(CodeGen, | |
| 497 ProgramTest, | |
| 498 ::testing::ValuesIn(kProgramTestFuncs)); | |
| 499 | |
| 500 } // namespace | 331 } // namespace |
| 501 | |
| 502 } // namespace sandbox | 332 } // namespace sandbox |
| OLD | NEW |