OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "sandbox/linux/seccomp-bpf/codegen.h" | 5 #include "sandbox/linux/seccomp-bpf/codegen.h" |
6 | 6 |
7 #include <linux/filter.h> | 7 #include <linux/filter.h> |
8 | 8 |
9 #include <set> | 9 #include <map> |
10 #include <string> | 10 #include <utility> |
11 #include <vector> | 11 #include <vector> |
12 | 12 |
13 #include "sandbox/linux/seccomp-bpf/basicblock.h" | 13 #include "base/macros.h" |
14 #include "sandbox/linux/seccomp-bpf/errorcode.h" | 14 #include "base/md5.h" |
15 #include "sandbox/linux/seccomp-bpf/instruction.h" | 15 #include "base/strings/string_piece.h" |
16 #include "testing/gtest/include/gtest/gtest.h" | 16 #include "testing/gtest/include/gtest/gtest.h" |
17 | 17 |
18 namespace sandbox { | 18 namespace sandbox { |
19 | 19 namespace { |
20 // We want to access some of the private methods in the code generator. We | 20 |
21 // do so by defining a "friend" that makes these methods public for us. | 21 // Hash provides an abstraction for building "hash trees" from BPF |
22 class CodeGenUnittestHelper : public CodeGen { | 22 // control flow graphs, and efficiently identifying equivalent graphs. |
| 23 // |
| 24 // For simplicity, we use MD5, because base happens to provide a |
| 25 // convenient API for its use. However, any collision-resistant hash |
| 26 // should suffice. |
| 27 class Hash { |
23 public: | 28 public: |
24 using CodeGen::CutGraphIntoBasicBlocks; | 29 static const Hash kZero; |
25 using CodeGen::FindBranchTargets; | 30 |
26 using CodeGen::MergeTails; | 31 Hash() : digest_() {} |
| 32 |
| 33 Hash(uint16_t code, |
| 34 uint32_t k, |
| 35 const Hash& jt = kZero, |
| 36 const Hash& jf = kZero) |
| 37 : digest_() { |
| 38 base::MD5Context ctx; |
| 39 base::MD5Init(&ctx); |
| 40 HashValue(&ctx, code); |
| 41 HashValue(&ctx, k); |
| 42 HashValue(&ctx, jt); |
| 43 HashValue(&ctx, jf); |
| 44 base::MD5Final(&digest_, &ctx); |
| 45 } |
| 46 |
| 47 Hash(const Hash& hash) = default; |
| 48 Hash& operator=(const Hash& rhs) = default; |
| 49 |
| 50 friend bool operator==(const Hash& lhs, const Hash& rhs) { |
| 51 return lhs.Base16() == rhs.Base16(); |
| 52 } |
| 53 friend bool operator!=(const Hash& lhs, const Hash& rhs) { |
| 54 return !(lhs == rhs); |
| 55 } |
| 56 |
| 57 private: |
| 58 template <typename T> |
| 59 void HashValue(base::MD5Context* ctx, const T& value) { |
| 60 base::MD5Update(ctx, |
| 61 base::StringPiece(reinterpret_cast<const char*>(&value), |
| 62 sizeof(value))); |
| 63 } |
| 64 |
| 65 std::string Base16() const { |
| 66 return MD5DigestToBase16(digest_); |
| 67 } |
| 68 |
| 69 base::MD5Digest digest_; |
27 }; | 70 }; |
28 | 71 |
29 namespace { | 72 const Hash Hash::kZero; |
30 | 73 |
31 enum { | 74 // Sanity check that equality and inequality work on Hash as required. |
32 NO_FLAGS = 0x0000, | 75 TEST(CodeGen, HashSanity) { |
33 HAS_MERGEABLE_TAILS = 0x0001, | 76 std::vector<Hash> hashes; |
| 77 |
| 78 // Push a bunch of logically distinct hashes. |
| 79 hashes.push_back(Hash::kZero); |
| 80 for (int i = 0; i < 4; ++i) { |
| 81 hashes.push_back(Hash(i & 1, i & 2)); |
| 82 } |
| 83 for (int i = 0; i < 16; ++i) { |
| 84 hashes.push_back(Hash(i & 1, i & 2, Hash(i & 4, i & 8))); |
| 85 } |
| 86 for (int i = 0; i < 64; ++i) { |
| 87 hashes.push_back( |
| 88 Hash(i & 1, i & 2, Hash(i & 4, i & 8), Hash(i & 16, i & 32))); |
| 89 } |
| 90 |
| 91 for (const Hash& a : hashes) { |
| 92 for (const Hash& b : hashes) { |
| 93 // Hashes should equal themselves, but not equal all others. |
| 94 if (&a == &b) { |
| 95 EXPECT_EQ(a, b); |
| 96 } else { |
| 97 EXPECT_NE(a, b); |
| 98 } |
| 99 } |
| 100 } |
| 101 } |
| 102 |
| 103 // ProgramTest provides a fixture for writing compiling sample |
| 104 // programs with CodeGen and verifying the linearized output matches |
| 105 // the input DAG. |
| 106 class ProgramTest : public ::testing::Test { |
| 107 protected: |
| 108 ProgramTest() : gen_(), node_hashes_() {} |
| 109 |
| 110 // MakeInstruction calls CodeGen::MakeInstruction() and associated |
| 111 // the returned address with a hash of the instruction. |
| 112 CodeGen::Node MakeInstruction(uint16_t code, |
| 113 uint32_t k, |
| 114 CodeGen::Node jt = CodeGen::kNullNode, |
| 115 CodeGen::Node jf = CodeGen::kNullNode) { |
| 116 CodeGen::Node res = gen_.MakeInstruction(code, k, jt, jf); |
| 117 EXPECT_NE(CodeGen::kNullNode, res); |
| 118 |
| 119 Hash digest; |
| 120 if (code == BPF_JMP + BPF_JA) { |
| 121 // TODO(mdempsky): Disallow use of JA. |
| 122 digest = Lookup(jt); |
| 123 } else { |
| 124 digest = Hash(code, k, Lookup(jt), Lookup(jf)); |
| 125 } |
| 126 auto it = node_hashes_.insert(std::make_pair(res, digest)); |
| 127 EXPECT_EQ(digest, it.first->second); |
| 128 |
| 129 return res; |
| 130 } |
| 131 |
| 132 // RunTest compiles the program and verifies that the output matches |
| 133 // what is expected. It should be called at the end of each program |
| 134 // test case. |
| 135 void RunTest(CodeGen::Node head) { |
| 136 // Compile the program |
| 137 CodeGen::Program program; |
| 138 gen_.Compile(head, &program); |
| 139 |
| 140 // Walk the program backwards, and compute the hash for each instruction. |
| 141 std::vector<Hash> prog_hashes(program.size()); |
| 142 for (size_t i = program.size(); i > 0; --i) { |
| 143 const sock_filter& insn = program.at(i - 1); |
| 144 Hash& hash = prog_hashes.at(i - 1); |
| 145 |
| 146 if (BPF_CLASS(insn.code) == BPF_JMP) { |
| 147 if (BPF_OP(insn.code) == BPF_JA) { |
| 148 // The compiler adds JA instructions as needed, so skip them. |
| 149 hash = prog_hashes.at(i + insn.k); |
| 150 } else { |
| 151 hash = Hash(insn.code, insn.k, prog_hashes.at(i + insn.jt), |
| 152 prog_hashes.at(i + insn.jf)); |
| 153 } |
| 154 } else if (BPF_CLASS(insn.code) == BPF_RET) { |
| 155 hash = Hash(insn.code, insn.k); |
| 156 } else { |
| 157 hash = Hash(insn.code, insn.k, prog_hashes.at(i)); |
| 158 } |
| 159 } |
| 160 |
| 161 EXPECT_EQ(Lookup(head), prog_hashes.at(0)); |
| 162 } |
| 163 |
| 164 private: |
| 165 const Hash& Lookup(CodeGen::Node next) const { |
| 166 if (next == CodeGen::kNullNode) { |
| 167 return Hash::kZero; |
| 168 } |
| 169 auto it = node_hashes_.find(next); |
| 170 if (it == node_hashes_.end()) { |
| 171 ADD_FAILURE() << "No hash found for node " << next; |
| 172 return Hash::kZero; |
| 173 } |
| 174 return it->second; |
| 175 } |
| 176 |
| 177 CodeGen gen_; |
| 178 std::map<CodeGen::Node, Hash> node_hashes_; |
| 179 |
| 180 DISALLOW_COPY_AND_ASSIGN(ProgramTest); |
34 }; | 181 }; |
35 | 182 |
36 using ProgramTestFunc = void (*)(CodeGenUnittestHelper* gen, | 183 TEST_F(ProgramTest, OneInstruction) { |
37 Instruction* head, | |
38 int flags); | |
39 | |
40 class ProgramTest : public ::testing::TestWithParam<ProgramTestFunc> { | |
41 protected: | |
42 ProgramTest() : gen_() {} | |
43 | |
44 // RunTest runs the test function argument. It should be called at | |
45 // the end of each program test case. | |
46 void RunTest(Instruction* head, int flags) { GetParam()(&gen_, head, flags); } | |
47 | |
48 Instruction* MakeInstruction(uint16_t code, | |
49 uint32_t k, | |
50 Instruction* next = nullptr) { | |
51 Instruction* ret = gen_.MakeInstruction(code, k, next); | |
52 EXPECT_NE(nullptr, ret); | |
53 EXPECT_EQ(code, ret->code); | |
54 EXPECT_EQ(k, ret->k); | |
55 if (code == BPF_JMP + BPF_JA) { | |
56 // Annoying inconsistency. | |
57 EXPECT_EQ(nullptr, ret->next); | |
58 EXPECT_EQ(next, ret->jt_ptr); | |
59 } else { | |
60 EXPECT_EQ(next, ret->next); | |
61 EXPECT_EQ(nullptr, ret->jt_ptr); | |
62 } | |
63 EXPECT_EQ(nullptr, ret->jf_ptr); | |
64 return ret; | |
65 } | |
66 | |
67 Instruction* MakeInstruction(uint16_t code, | |
68 uint32_t k, | |
69 Instruction* jt, | |
70 Instruction* jf) { | |
71 Instruction* ret = gen_.MakeInstruction(code, k, jt, jf); | |
72 EXPECT_NE(nullptr, ret); | |
73 EXPECT_EQ(code, ret->code); | |
74 EXPECT_EQ(k, ret->k); | |
75 EXPECT_EQ(nullptr, ret->next); | |
76 EXPECT_EQ(jt, ret->jt_ptr); | |
77 EXPECT_EQ(jf, ret->jf_ptr); | |
78 return ret; | |
79 } | |
80 | |
81 private: | |
82 CodeGenUnittestHelper gen_; | |
83 }; | |
84 | |
85 TEST_P(ProgramTest, OneInstruction) { | |
86 // Create the most basic valid BPF program: | 184 // Create the most basic valid BPF program: |
87 // RET 0 | 185 // RET 0 |
88 Instruction* head = MakeInstruction(BPF_RET + BPF_K, 0); | 186 CodeGen::Node head = MakeInstruction(BPF_RET + BPF_K, 0); |
89 RunTest(head, NO_FLAGS); | 187 RunTest(head); |
90 } | 188 } |
91 | 189 |
92 TEST_P(ProgramTest, SimpleBranch) { | 190 TEST_F(ProgramTest, SimpleBranch) { |
93 // Create a program with a single branch: | 191 // Create a program with a single branch: |
94 // JUMP if eq 42 then $0 else $1 | 192 // JUMP if eq 42 then $0 else $1 |
95 // 0: RET 1 | 193 // 0: RET 1 |
96 // 1: RET 0 | 194 // 1: RET 0 |
97 Instruction* head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, | 195 CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, |
98 42, | 196 MakeInstruction(BPF_RET + BPF_K, 1), |
99 MakeInstruction(BPF_RET + BPF_K, 1), | 197 MakeInstruction(BPF_RET + BPF_K, 0)); |
100 MakeInstruction(BPF_RET + BPF_K, 0)); | 198 RunTest(head); |
101 RunTest(head, NO_FLAGS); | 199 } |
102 } | 200 |
103 | 201 TEST_F(ProgramTest, AtypicalBranch) { |
104 TEST_P(ProgramTest, AtypicalBranch) { | |
105 // Create a program with a single branch: | 202 // Create a program with a single branch: |
106 // JUMP if eq 42 then $0 else $0 | 203 // JUMP if eq 42 then $0 else $0 |
107 // 0: RET 0 | 204 // 0: RET 0 |
108 | 205 |
109 Instruction* ret = MakeInstruction(BPF_RET + BPF_K, 0); | 206 CodeGen::Node ret = MakeInstruction(BPF_RET + BPF_K, 0); |
110 Instruction* head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, ret, ret); | 207 CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, ret, ret); |
111 | 208 |
112 // N.B.: As the instructions in both sides of the branch are already | 209 // N.B.: As the instructions in both sides of the branch are already |
113 // the same object, we do not actually have any "mergeable" branches. | 210 // the same object, we do not actually have any "mergeable" branches. |
114 // This needs to be reflected in our choice of "flags". | 211 // This needs to be reflected in our choice of "flags". |
115 RunTest(head, NO_FLAGS); | 212 RunTest(head); |
116 } | 213 } |
117 | 214 |
118 TEST_P(ProgramTest, Complex) { | 215 TEST_F(ProgramTest, Complex) { |
119 // Creates a basic BPF program that we'll use to test some of the code: | 216 // Creates a basic BPF program that we'll use to test some of the code: |
120 // JUMP if eq 42 the $0 else $1 (insn6) | 217 // JUMP if eq 42 the $0 else $1 (insn6) |
121 // 0: LD 23 (insn5) | 218 // 0: LD 23 (insn5) |
122 // 1: JUMP if eq 42 then $2 else $4 (insn4) | 219 // 1: JUMP if eq 42 then $2 else $4 (insn4) |
123 // 2: JUMP to $3 (insn2) | 220 // 2: JUMP to $3 (insn2) |
124 // 3: LD 42 (insn1) | 221 // 3: LD 42 (insn1) |
125 // RET 42 (insn0) | 222 // RET 42 (insn0) |
126 // 4: LD 42 (insn3) | 223 // 4: LD 42 (insn3) |
127 // RET 42 (insn3+) | 224 // RET 42 (insn3+) |
128 Instruction* insn0 = MakeInstruction(BPF_RET + BPF_K, 42); | 225 CodeGen::Node insn0 = MakeInstruction(BPF_RET + BPF_K, 42); |
129 Instruction* insn1 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42, insn0); | 226 CodeGen::Node insn1 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42, insn0); |
130 Instruction* insn2 = MakeInstruction(BPF_JMP + BPF_JA, 0, insn1); | 227 CodeGen::Node insn2 = MakeInstruction(BPF_JMP + BPF_JA, 0, insn1); |
131 | 228 |
132 // We explicitly duplicate instructions so that MergeTails() can coalesce | 229 // We explicitly duplicate instructions so that MergeTails() can coalesce |
133 // them later. | 230 // them later. |
134 Instruction* insn3 = MakeInstruction( | 231 CodeGen::Node insn3 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42, |
135 BPF_LD + BPF_W + BPF_ABS, 42, MakeInstruction(BPF_RET + BPF_K, 42)); | 232 MakeInstruction(BPF_RET + BPF_K, 42)); |
136 | 233 |
137 Instruction* insn4 = | 234 CodeGen::Node insn4 = |
138 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn2, insn3); | 235 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn2, insn3); |
139 Instruction* insn5 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 23, insn4); | 236 CodeGen::Node insn5 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 23, insn4); |
140 | 237 |
141 // Force a basic block that ends in neither a jump instruction nor a return | 238 // Force a basic block that ends in neither a jump instruction nor a return |
142 // instruction. It only contains "insn5". This exercises one of the less | 239 // instruction. It only contains "insn5". This exercises one of the less |
143 // common code paths in the topo-sort algorithm. | 240 // common code paths in the topo-sort algorithm. |
144 // This also gives us a diamond-shaped pattern in our graph, which stresses | 241 // This also gives us a diamond-shaped pattern in our graph, which stresses |
145 // another aspect of the topo-sort algorithm (namely, the ability to | 242 // another aspect of the topo-sort algorithm (namely, the ability to |
146 // correctly count the incoming branches for subtrees that are not disjunct). | 243 // correctly count the incoming branches for subtrees that are not disjunct). |
147 Instruction* insn6 = | 244 CodeGen::Node insn6 = |
148 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn5, insn4); | 245 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn5, insn4); |
149 | 246 |
150 RunTest(insn6, HAS_MERGEABLE_TAILS); | 247 RunTest(insn6); |
151 } | 248 } |
152 | 249 |
153 TEST_P(ProgramTest, ConfusingTails) { | 250 TEST_F(ProgramTest, ConfusingTails) { |
154 // This simple program demonstrates https://crbug.com/351103/ | 251 // This simple program demonstrates https://crbug.com/351103/ |
155 // The two "LOAD 0" instructions are blocks of their own. MergeTails() could | 252 // The two "LOAD 0" instructions are blocks of their own. MergeTails() could |
156 // be tempted to merge them since they are the same. However, they are | 253 // be tempted to merge them since they are the same. However, they are |
157 // not mergeable because they fall-through to non semantically equivalent | 254 // not mergeable because they fall-through to non semantically equivalent |
158 // blocks. | 255 // blocks. |
159 // Without the fix for this bug, this program should trigger the check in | 256 // Without the fix for this bug, this program should trigger the check in |
160 // CompileAndCompare: the serialized graphs from the program and its compiled | 257 // CompileAndCompare: the serialized graphs from the program and its compiled |
161 // version will differ. | 258 // version will differ. |
162 // | 259 // |
163 // 0) LOAD 1 // ??? | 260 // 0) LOAD 1 // ??? |
164 // 1) if A == 0x1; then JMP 2 else JMP 3 | 261 // 1) if A == 0x1; then JMP 2 else JMP 3 |
165 // 2) LOAD 0 // System call number | 262 // 2) LOAD 0 // System call number |
166 // 3) if A == 0x2; then JMP 4 else JMP 5 | 263 // 3) if A == 0x2; then JMP 4 else JMP 5 |
167 // 4) LOAD 0 // System call number | 264 // 4) LOAD 0 // System call number |
168 // 5) if A == 0x1; then JMP 6 else JMP 7 | 265 // 5) if A == 0x1; then JMP 6 else JMP 7 |
169 // 6) RET 0 | 266 // 6) RET 0 |
170 // 7) RET 1 | 267 // 7) RET 1 |
171 | 268 |
172 Instruction* i7 = MakeInstruction(BPF_RET + BPF_K, 1); | 269 CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1); |
173 Instruction* i6 = MakeInstruction(BPF_RET + BPF_K, 0); | 270 CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0); |
174 Instruction* i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7); | 271 CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7); |
175 Instruction* i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5); | 272 CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5); |
176 Instruction* i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); | 273 CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); |
177 Instruction* i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3); | 274 CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3); |
178 Instruction* i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); | 275 CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); |
179 Instruction* i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); | 276 CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); |
180 | 277 |
181 RunTest(i0, NO_FLAGS); | 278 RunTest(i0); |
182 } | 279 } |
183 | 280 |
184 TEST_P(ProgramTest, ConfusingTailsBasic) { | 281 TEST_F(ProgramTest, ConfusingTailsBasic) { |
185 // Without the fix for https://crbug.com/351103/, (see | 282 // Without the fix for https://crbug.com/351103/, (see |
186 // SampleProgramConfusingTails()), this would generate a cyclic graph and | 283 // SampleProgramConfusingTails()), this would generate a cyclic graph and |
187 // crash as the two "LOAD 0" instructions would get merged. | 284 // crash as the two "LOAD 0" instructions would get merged. |
188 // | 285 // |
189 // 0) LOAD 1 // ??? | 286 // 0) LOAD 1 // ??? |
190 // 1) if A == 0x1; then JMP 2 else JMP 3 | 287 // 1) if A == 0x1; then JMP 2 else JMP 3 |
191 // 2) LOAD 0 // System call number | 288 // 2) LOAD 0 // System call number |
192 // 3) if A == 0x2; then JMP 4 else JMP 5 | 289 // 3) if A == 0x2; then JMP 4 else JMP 5 |
193 // 4) LOAD 0 // System call number | 290 // 4) LOAD 0 // System call number |
194 // 5) RET 1 | 291 // 5) RET 1 |
195 | 292 |
196 Instruction* i5 = MakeInstruction(BPF_RET + BPF_K, 1); | 293 CodeGen::Node i5 = MakeInstruction(BPF_RET + BPF_K, 1); |
197 Instruction* i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5); | 294 CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5); |
198 Instruction* i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); | 295 CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); |
199 Instruction* i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3); | 296 CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3); |
200 Instruction* i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); | 297 CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); |
201 Instruction* i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); | 298 CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); |
202 | 299 |
203 RunTest(i0, NO_FLAGS); | 300 RunTest(i0); |
204 } | 301 } |
205 | 302 |
206 TEST_P(ProgramTest, ConfusingTailsMergeable) { | 303 TEST_F(ProgramTest, ConfusingTailsMergeable) { |
207 // This is similar to SampleProgramConfusingTails(), except that | 304 // This is similar to SampleProgramConfusingTails(), except that |
208 // instructions 2 and 4 are now RET instructions. | 305 // instructions 2 and 4 are now RET instructions. |
209 // In PointerCompare(), this exercises the path where two blocks are of the | 306 // In PointerCompare(), this exercises the path where two blocks are of the |
210 // same length and identical and the last instruction is a JMP or RET, so the | 307 // same length and identical and the last instruction is a JMP or RET, so the |
211 // following blocks don't need to be looked at and the blocks are mergeable. | 308 // following blocks don't need to be looked at and the blocks are mergeable. |
212 // | 309 // |
213 // 0) LOAD 1 // ??? | 310 // 0) LOAD 1 // ??? |
214 // 1) if A == 0x1; then JMP 2 else JMP 3 | 311 // 1) if A == 0x1; then JMP 2 else JMP 3 |
215 // 2) RET 42 | 312 // 2) RET 42 |
216 // 3) if A == 0x2; then JMP 4 else JMP 5 | 313 // 3) if A == 0x2; then JMP 4 else JMP 5 |
217 // 4) RET 42 | 314 // 4) RET 42 |
218 // 5) if A == 0x1; then JMP 6 else JMP 7 | 315 // 5) if A == 0x1; then JMP 6 else JMP 7 |
219 // 6) RET 0 | 316 // 6) RET 0 |
220 // 7) RET 1 | 317 // 7) RET 1 |
221 | 318 |
222 Instruction* i7 = MakeInstruction(BPF_RET + BPF_K, 1); | 319 CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1); |
223 Instruction* i6 = MakeInstruction(BPF_RET + BPF_K, 0); | 320 CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0); |
224 Instruction* i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7); | 321 CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7); |
225 Instruction* i4 = MakeInstruction(BPF_RET + BPF_K, 42); | 322 CodeGen::Node i4 = MakeInstruction(BPF_RET + BPF_K, 42); |
226 Instruction* i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); | 323 CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); |
227 Instruction* i2 = MakeInstruction(BPF_RET + BPF_K, 42); | 324 CodeGen::Node i2 = MakeInstruction(BPF_RET + BPF_K, 42); |
228 Instruction* i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); | 325 CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); |
229 Instruction* i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); | 326 CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); |
230 | 327 |
231 RunTest(i0, HAS_MERGEABLE_TAILS); | 328 RunTest(i0); |
232 } | 329 } |
233 | 330 |
234 void MakeInstruction(CodeGenUnittestHelper* codegen, | |
235 Instruction* program, int) { | |
236 // Nothing to do here | |
237 } | |
238 | |
239 void FindBranchTargets(CodeGenUnittestHelper* codegen, Instruction* prg, int) { | |
240 BranchTargets branch_targets; | |
241 codegen->FindBranchTargets(*prg, &branch_targets); | |
242 | |
243 // Verifying the general properties that should be true for every | |
244 // well-formed BPF program. | |
245 // Perform a depth-first traversal of the BPF program an verify that all | |
246 // targets of BPF_JMP instructions are represented in the "branch_targets". | |
247 // At the same time, compute a set of both the branch targets and all the | |
248 // instructions in the program. | |
249 std::vector<Instruction*> stack; | |
250 std::set<Instruction*> all_instructions; | |
251 std::set<Instruction*> target_instructions; | |
252 BranchTargets::const_iterator end = branch_targets.end(); | |
253 for (Instruction* insn = prg;;) { | |
254 all_instructions.insert(insn); | |
255 if (BPF_CLASS(insn->code) == BPF_JMP) { | |
256 target_instructions.insert(insn->jt_ptr); | |
257 ASSERT_TRUE(insn->jt_ptr != NULL); | |
258 ASSERT_TRUE(branch_targets.find(insn->jt_ptr) != end); | |
259 if (BPF_OP(insn->code) != BPF_JA) { | |
260 target_instructions.insert(insn->jf_ptr); | |
261 ASSERT_TRUE(insn->jf_ptr != NULL); | |
262 ASSERT_TRUE(branch_targets.find(insn->jf_ptr) != end); | |
263 stack.push_back(insn->jf_ptr); | |
264 } | |
265 insn = insn->jt_ptr; | |
266 } else if (BPF_CLASS(insn->code) == BPF_RET) { | |
267 ASSERT_TRUE(insn->next == NULL); | |
268 if (stack.empty()) { | |
269 break; | |
270 } | |
271 insn = stack.back(); | |
272 stack.pop_back(); | |
273 } else { | |
274 ASSERT_TRUE(insn->next != NULL); | |
275 insn = insn->next; | |
276 } | |
277 } | |
278 ASSERT_TRUE(target_instructions.size() == branch_targets.size()); | |
279 | |
280 // We can now subtract the set of the branch targets from the set of all | |
281 // instructions. This gives us a set with the instructions that nobody | |
282 // ever jumps to. Verify that they are no included in the | |
283 // "branch_targets" that FindBranchTargets() computed for us. | |
284 Instructions non_target_instructions(all_instructions.size() - | |
285 target_instructions.size()); | |
286 set_difference(all_instructions.begin(), | |
287 all_instructions.end(), | |
288 target_instructions.begin(), | |
289 target_instructions.end(), | |
290 non_target_instructions.begin()); | |
291 for (Instructions::const_iterator iter = non_target_instructions.begin(); | |
292 iter != non_target_instructions.end(); | |
293 ++iter) { | |
294 ASSERT_TRUE(branch_targets.find(*iter) == end); | |
295 } | |
296 } | |
297 | |
298 void CutGraphIntoBasicBlocks(CodeGenUnittestHelper* codegen, | |
299 Instruction* prg, | |
300 int) { | |
301 BranchTargets branch_targets; | |
302 codegen->FindBranchTargets(*prg, &branch_targets); | |
303 TargetsToBlocks all_blocks; | |
304 BasicBlock* first_block = | |
305 codegen->CutGraphIntoBasicBlocks(prg, branch_targets, &all_blocks); | |
306 ASSERT_TRUE(first_block != NULL); | |
307 ASSERT_TRUE(first_block->instructions.size() > 0); | |
308 Instruction* first_insn = first_block->instructions[0]; | |
309 | |
310 // Basic blocks are supposed to start with a branch target and end with | |
311 // either a jump or a return instruction. It can also end, if the next | |
312 // instruction forms the beginning of a new basic block. There should be | |
313 // no other jumps or return instructions in the middle of a basic block. | |
314 for (TargetsToBlocks::const_iterator bb_iter = all_blocks.begin(); | |
315 bb_iter != all_blocks.end(); | |
316 ++bb_iter) { | |
317 BasicBlock* bb = bb_iter->second; | |
318 ASSERT_TRUE(bb != NULL); | |
319 ASSERT_TRUE(bb->instructions.size() > 0); | |
320 Instruction* insn = bb->instructions[0]; | |
321 ASSERT_TRUE(insn == first_insn || | |
322 branch_targets.find(insn) != branch_targets.end()); | |
323 for (Instructions::const_iterator insn_iter = bb->instructions.begin();;) { | |
324 insn = *insn_iter; | |
325 if (++insn_iter != bb->instructions.end()) { | |
326 ASSERT_TRUE(BPF_CLASS(insn->code) != BPF_JMP); | |
327 ASSERT_TRUE(BPF_CLASS(insn->code) != BPF_RET); | |
328 } else { | |
329 ASSERT_TRUE(BPF_CLASS(insn->code) == BPF_JMP || | |
330 BPF_CLASS(insn->code) == BPF_RET || | |
331 branch_targets.find(insn->next) != branch_targets.end()); | |
332 break; | |
333 } | |
334 ASSERT_TRUE(branch_targets.find(*insn_iter) == branch_targets.end()); | |
335 } | |
336 } | |
337 } | |
338 | |
339 void MergeTails(CodeGenUnittestHelper* codegen, Instruction* prg, int flags) { | |
340 BranchTargets branch_targets; | |
341 codegen->FindBranchTargets(*prg, &branch_targets); | |
342 TargetsToBlocks all_blocks; | |
343 BasicBlock* first_block = | |
344 codegen->CutGraphIntoBasicBlocks(prg, branch_targets, &all_blocks); | |
345 | |
346 // The shape of our graph and thus the function of our program should | |
347 // still be unchanged after we run MergeTails(). We verify this by | |
348 // serializing the graph and verifying that it is still the same. | |
349 // We also verify that at least some of the edges changed because of | |
350 // tail merging. | |
351 std::string graph[2]; | |
352 std::string edges[2]; | |
353 | |
354 // The loop executes twice. After the first run, we call MergeTails() on | |
355 // our graph. | |
356 for (int i = 0;;) { | |
357 // Traverse the entire program in depth-first order. | |
358 std::vector<BasicBlock*> stack; | |
359 for (BasicBlock* bb = first_block;;) { | |
360 // Serialize the instructions in this basic block. In general, we only | |
361 // need to serialize "code" and "k"; except for a BPF_JA instruction | |
362 // where "k" isn't set. | |
363 // The stream of instructions should be unchanged after MergeTails(). | |
364 for (Instructions::const_iterator iter = bb->instructions.begin(); | |
365 iter != bb->instructions.end(); | |
366 ++iter) { | |
367 graph[i].append(reinterpret_cast<char*>(&(*iter)->code), | |
368 sizeof((*iter)->code)); | |
369 if (BPF_CLASS((*iter)->code) != BPF_JMP || | |
370 BPF_OP((*iter)->code) != BPF_JA) { | |
371 graph[i].append(reinterpret_cast<char*>(&(*iter)->k), | |
372 sizeof((*iter)->k)); | |
373 } | |
374 } | |
375 | |
376 // Also serialize the addresses the basic blocks as we encounter them. | |
377 // This will change as basic blocks are coalesed by MergeTails(). | |
378 edges[i].append(reinterpret_cast<char*>(&bb), sizeof(bb)); | |
379 | |
380 // Depth-first traversal of the graph. We only ever need to look at the | |
381 // very last instruction in the basic block, as that is the only one that | |
382 // can change code flow. | |
383 Instruction* insn = bb->instructions.back(); | |
384 if (BPF_CLASS(insn->code) == BPF_JMP) { | |
385 // For jump instructions, we need to remember the "false" branch while | |
386 // traversing the "true" branch. This is not necessary for BPF_JA which | |
387 // only has a single branch. | |
388 if (BPF_OP(insn->code) != BPF_JA) { | |
389 stack.push_back(all_blocks[insn->jf_ptr]); | |
390 } | |
391 bb = all_blocks[insn->jt_ptr]; | |
392 } else if (BPF_CLASS(insn->code) == BPF_RET) { | |
393 // After a BPF_RET, see if we need to back track. | |
394 if (stack.empty()) { | |
395 break; | |
396 } | |
397 bb = stack.back(); | |
398 stack.pop_back(); | |
399 } else { | |
400 // For "normal" instructions, just follow to the next basic block. | |
401 bb = all_blocks[insn->next]; | |
402 } | |
403 } | |
404 | |
405 // Our loop runs exactly two times. | |
406 if (++i > 1) { | |
407 break; | |
408 } | |
409 codegen->MergeTails(&all_blocks); | |
410 } | |
411 ASSERT_TRUE(graph[0] == graph[1]); | |
412 if (flags & HAS_MERGEABLE_TAILS) { | |
413 ASSERT_TRUE(edges[0] != edges[1]); | |
414 } else { | |
415 ASSERT_TRUE(edges[0] == edges[1]); | |
416 } | |
417 } | |
418 | |
419 void CompileAndCompare(CodeGenUnittestHelper* codegen, Instruction* prg, int) { | |
420 // TopoSortBasicBlocks() has internal checks that cause it to fail, if it | |
421 // detects a problem. Typically, if anything goes wrong, this looks to the | |
422 // TopoSort algorithm as if there had been cycles in the input data. | |
423 // This provides a pretty good unittest. | |
424 // We hand-crafted the program returned by SampleProgram() to exercise | |
425 // several of the more interesting code-paths. See comments in | |
426 // SampleProgram() for details. | |
427 // In addition to relying on the internal consistency checks in the compiler, | |
428 // we also serialize the graph and the resulting BPF program and compare | |
429 // them. With the exception of BPF_JA instructions that might have been | |
430 // inserted, both instruction streams should be equivalent. | |
431 // As Compile() modifies the instructions, we have to serialize the graph | |
432 // before calling Compile(). | |
433 std::string source; | |
434 Instructions source_stack; | |
435 for (const Instruction* insn = prg, *next; insn; insn = next) { | |
436 if (BPF_CLASS(insn->code) == BPF_JMP) { | |
437 if (BPF_OP(insn->code) == BPF_JA) { | |
438 // Do not serialize BPF_JA instructions (see above). | |
439 next = insn->jt_ptr; | |
440 continue; | |
441 } else { | |
442 source_stack.push_back(insn->jf_ptr); | |
443 next = insn->jt_ptr; | |
444 } | |
445 } else if (BPF_CLASS(insn->code) == BPF_RET) { | |
446 if (source_stack.empty()) { | |
447 next = NULL; | |
448 } else { | |
449 next = source_stack.back(); | |
450 source_stack.pop_back(); | |
451 } | |
452 } else { | |
453 next = insn->next; | |
454 } | |
455 // Only serialize "code" and "k". That's all the information we need to | |
456 // compare. The rest of the information is encoded in the order of | |
457 // instructions. | |
458 source.append(reinterpret_cast<const char*>(&insn->code), | |
459 sizeof(insn->code)); | |
460 source.append(reinterpret_cast<const char*>(&insn->k), sizeof(insn->k)); | |
461 } | |
462 | |
463 // Compile the program | |
464 CodeGen::Program bpf; | |
465 codegen->Compile(prg, &bpf); | |
466 | |
467 // Serialize the resulting BPF instructions. | |
468 std::string assembly; | |
469 std::vector<int> assembly_stack; | |
470 for (int idx = 0; idx >= 0;) { | |
471 ASSERT_TRUE(idx < (int)bpf.size()); | |
472 struct sock_filter& insn = bpf[idx]; | |
473 if (BPF_CLASS(insn.code) == BPF_JMP) { | |
474 if (BPF_OP(insn.code) == BPF_JA) { | |
475 // Do not serialize BPF_JA instructions (see above). | |
476 idx += insn.k + 1; | |
477 continue; | |
478 } else { | |
479 assembly_stack.push_back(idx + insn.jf + 1); | |
480 idx += insn.jt + 1; | |
481 } | |
482 } else if (BPF_CLASS(insn.code) == BPF_RET) { | |
483 if (assembly_stack.empty()) { | |
484 idx = -1; | |
485 } else { | |
486 idx = assembly_stack.back(); | |
487 assembly_stack.pop_back(); | |
488 } | |
489 } else { | |
490 ++idx; | |
491 } | |
492 // Serialize the same information that we serialized before compilation. | |
493 assembly.append(reinterpret_cast<char*>(&insn.code), sizeof(insn.code)); | |
494 assembly.append(reinterpret_cast<char*>(&insn.k), sizeof(insn.k)); | |
495 } | |
496 ASSERT_TRUE(source == assembly); | |
497 } | |
498 | |
499 const ProgramTestFunc kProgramTestFuncs[] = { | |
500 MakeInstruction, | |
501 FindBranchTargets, | |
502 CutGraphIntoBasicBlocks, | |
503 MergeTails, | |
504 CompileAndCompare, | |
505 }; | |
506 | |
507 INSTANTIATE_TEST_CASE_P(CodeGen, | |
508 ProgramTest, | |
509 ::testing::ValuesIn(kProgramTestFuncs)); | |
510 | |
511 } // namespace | 331 } // namespace |
512 | |
513 } // namespace sandbox | 332 } // namespace sandbox |
OLD | NEW |