OLD | NEW |
| (Empty) |
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "third_party/courgette/encoded_program.h" | |
6 | |
7 #include <algorithm> | |
8 #include <map> | |
9 #include <string> | |
10 #include <vector> | |
11 | |
12 #include "base/logging.h" | |
13 #include "base/sys_info.h" | |
14 | |
15 #include "third_party/courgette/courgette.h" | |
16 #include "third_party/courgette/streams.h" | |
17 | |
18 namespace courgette { | |
19 | |
20 // Stream indexes. | |
21 const int kStreamMisc = 0; | |
22 const int kStreamOps = 1; | |
23 const int kStreamBytes = 2; | |
24 const int kStreamAbs32Indexes = 3; | |
25 const int kStreamRel32Indexes = 4; | |
26 const int kStreamAbs32Addresses = 5; | |
27 const int kStreamRel32Addresses = 6; | |
28 const int kStreamCopyCounts = 7; | |
29 const int kStreamOriginAddresses = kStreamMisc; | |
30 | |
31 const int kStreamLimit = 9; | |
32 | |
33 // Binary assembly language operations. | |
34 enum EncodedProgram::OP { | |
35 ORIGIN, // ORIGIN <rva> - set address for subsequent assembly. | |
36 COPY, // COPY <count> <bytes> - copy bytes to output. | |
37 COPY1, // COPY1 <byte> - same as COPY 1 <byte>. | |
38 REL32, // REL32 <index> - emit rel32 encoded reference to address at | |
39 // address table offset <index> | |
40 ABS32, // ABS32 <index> - emit abs32 encoded reference to address at | |
41 // address table offset <index> | |
42 MAKE_BASE_RELOCATION_TABLE, // Emit base relocation table blocks. | |
43 OP_LAST | |
44 }; | |
45 | |
46 | |
47 // Constructor is here rather than in the header. Although the constructor | |
48 // appears to do nothing it is fact quite large because of the implict calls to | |
49 // field constructors. Ditto for the destructor. | |
50 EncodedProgram::EncodedProgram() {} | |
51 EncodedProgram::~EncodedProgram() {} | |
52 | |
53 // Serializes a vector of integral values using Varint32 coding. | |
54 template<typename T> | |
55 void WriteVector(const std::vector<T>& items, SinkStream* buffer) { | |
56 size_t count = items.size(); | |
57 buffer->WriteVarint32(count); | |
58 for (size_t i = 0; i < count; ++i) { | |
59 COMPILE_ASSERT(sizeof(T) <= sizeof(uint32), T_must_fit_in_uint32); | |
60 buffer->WriteVarint32(static_cast<uint32>(items[i])); | |
61 } | |
62 } | |
63 | |
64 template<typename T> | |
65 bool ReadVector(std::vector<T>* items, SourceStream* buffer) { | |
66 uint32 count; | |
67 if (!buffer->ReadVarint32(&count)) | |
68 return false; | |
69 | |
70 items->clear(); | |
71 items->reserve(count); | |
72 for (size_t i = 0; i < count; ++i) { | |
73 uint32 item; | |
74 if (!buffer->ReadVarint32(&item)) | |
75 return false; | |
76 items->push_back(static_cast<T>(item)); | |
77 } | |
78 | |
79 return true; | |
80 } | |
81 | |
82 // Serializes a vector, using delta coding followed by Varint32 coding. | |
83 void WriteU32Delta(const std::vector<uint32>& set, SinkStream* buffer) { | |
84 size_t count = set.size(); | |
85 buffer->WriteVarint32(count); | |
86 uint32 prev = 0; | |
87 for (size_t i = 0; i < count; ++i) { | |
88 uint32 current = set[i]; | |
89 uint32 delta = current - prev; | |
90 buffer->WriteVarint32(delta); | |
91 prev = current; | |
92 } | |
93 } | |
94 | |
95 static bool ReadU32Delta(std::vector<uint32>* set, SourceStream* buffer) { | |
96 uint32 count; | |
97 | |
98 if (!buffer->ReadVarint32(&count)) | |
99 return false; | |
100 | |
101 set->clear(); | |
102 set->reserve(count); | |
103 uint32 prev = 0; | |
104 | |
105 for (size_t i = 0; i < count; ++i) { | |
106 uint32 delta; | |
107 if (!buffer->ReadVarint32(&delta)) | |
108 return false; | |
109 uint32 current = prev + delta; | |
110 set->push_back(current); | |
111 prev = current; | |
112 } | |
113 | |
114 return true; | |
115 } | |
116 | |
117 // Write a vector as the byte representation of the contents. | |
118 // | |
119 // (This only really makes sense for a type T that has sizeof(T)==1, otherwise | |
120 // serilized representation is not endian-agnositic. But it is useful to keep | |
121 // the possibility of a greater size for experiments comparing Varint32 encoding | |
122 // of a vector of larger integrals vs a plain form.) | |
123 // | |
124 template<typename T> | |
125 void WriteVectorU8(const std::vector<T>& items, SinkStream* buffer) { | |
126 uint32 count = items.size(); | |
127 buffer->WriteVarint32(count); | |
128 if (count != 0) { | |
129 size_t byte_count = count * sizeof(T); | |
130 buffer->Write(static_cast<const void*>(&items[0]), byte_count); | |
131 } | |
132 } | |
133 | |
134 template<typename T> | |
135 bool ReadVectorU8(std::vector<T>* items, SourceStream* buffer) { | |
136 uint32 count; | |
137 if (!buffer->ReadVarint32(&count)) | |
138 return false; | |
139 | |
140 items->clear(); | |
141 items->resize(count); | |
142 if (count != 0) { | |
143 size_t byte_count = count * sizeof(T); | |
144 return buffer->Read(static_cast<void*>(&((*items)[0])), byte_count); | |
145 } | |
146 return true; | |
147 } | |
148 | |
149 //////////////////////////////////////////////////////////////////////////////// | |
150 | |
151 void EncodedProgram::DefineRel32Label(int index, RVA value) { | |
152 DefineLabelCommon(&rel32_rva_, index, value); | |
153 } | |
154 | |
155 void EncodedProgram::DefineAbs32Label(int index, RVA value) { | |
156 DefineLabelCommon(&abs32_rva_, index, value); | |
157 } | |
158 | |
159 static const RVA kUnassignedRVA = static_cast<RVA>(-1); | |
160 | |
161 void EncodedProgram::DefineLabelCommon(std::vector<RVA>* rvas, | |
162 int index, | |
163 RVA rva) { | |
164 if (static_cast<int>(rvas->size()) <= index) { | |
165 rvas->resize(index + 1, kUnassignedRVA); | |
166 } | |
167 if ((*rvas)[index] != kUnassignedRVA) { | |
168 NOTREACHED() << "DefineLabel double assigned " << index; | |
169 } | |
170 (*rvas)[index] = rva; | |
171 } | |
172 | |
173 void EncodedProgram::EndLabels() { | |
174 FinishLabelsCommon(&abs32_rva_); | |
175 FinishLabelsCommon(&rel32_rva_); | |
176 } | |
177 | |
178 void EncodedProgram::FinishLabelsCommon(std::vector<RVA>* rvas) { | |
179 // Replace all unassigned slots with the value at the previous index so they | |
180 // delta-encode to zero. (There might be better values than zero. The way to | |
181 // get that is have the higher level assembly program assign the unassigned | |
182 // slots.) | |
183 RVA previous = 0; | |
184 size_t size = rvas->size(); | |
185 for (size_t i = 0; i < size; ++i) { | |
186 if ((*rvas)[i] == kUnassignedRVA) | |
187 (*rvas)[i] = previous; | |
188 else | |
189 previous = (*rvas)[i]; | |
190 } | |
191 } | |
192 | |
193 void EncodedProgram::AddOrigin(RVA origin) { | |
194 ops_.push_back(ORIGIN); | |
195 origins_.push_back(origin); | |
196 } | |
197 | |
198 void EncodedProgram::AddCopy(int count, const void* bytes) { | |
199 const uint8* source = static_cast<const uint8*>(bytes); | |
200 | |
201 // Fold adjacent COPY instructions into one. This nearly halves the size of | |
202 // an EncodedProgram with only COPY1 instructions since there are approx plain | |
203 // 16 bytes per reloc. This has a working-set benefit during decompression. | |
204 // For compression of files with large differences this makes a small (4%) | |
205 // improvement in size. For files with small differences this degrades the | |
206 // compressed size by 1.3% | |
207 if (ops_.size() > 0) { | |
208 if (ops_.back() == COPY1) { | |
209 ops_.back() = COPY; | |
210 copy_counts_.push_back(1); | |
211 } | |
212 if (ops_.back() == COPY) { | |
213 copy_counts_.back() += count; | |
214 for (int i = 0; i < count; ++i) { | |
215 copy_bytes_.push_back(source[i]); | |
216 } | |
217 return; | |
218 } | |
219 } | |
220 | |
221 if (count == 1) { | |
222 ops_.push_back(COPY1); | |
223 copy_bytes_.push_back(source[0]); | |
224 } else { | |
225 ops_.push_back(COPY); | |
226 copy_counts_.push_back(count); | |
227 for (int i = 0; i < count; ++i) { | |
228 copy_bytes_.push_back(source[i]); | |
229 } | |
230 } | |
231 } | |
232 | |
233 void EncodedProgram::AddAbs32(int label_index) { | |
234 ops_.push_back(ABS32); | |
235 abs32_ix_.push_back(label_index); | |
236 } | |
237 | |
238 void EncodedProgram::AddRel32(int label_index) { | |
239 ops_.push_back(REL32); | |
240 rel32_ix_.push_back(label_index); | |
241 } | |
242 | |
243 void EncodedProgram::AddMakeRelocs() { | |
244 ops_.push_back(MAKE_BASE_RELOCATION_TABLE); | |
245 } | |
246 | |
247 void EncodedProgram::DebuggingSummary() { | |
248 LOG(INFO) << "EncodedProgram Summary"; | |
249 LOG(INFO) << " image base " << image_base_; | |
250 LOG(INFO) << " abs32 rvas " << abs32_rva_.size(); | |
251 LOG(INFO) << " rel32 rvas " << rel32_rva_.size(); | |
252 LOG(INFO) << " ops " << ops_.size(); | |
253 LOG(INFO) << " origins " << origins_.size(); | |
254 LOG(INFO) << " copy_counts " << copy_counts_.size(); | |
255 LOG(INFO) << " copy_bytes " << copy_bytes_.size(); | |
256 LOG(INFO) << " abs32_ix " << abs32_ix_.size(); | |
257 LOG(INFO) << " rel32_ix " << rel32_ix_.size(); | |
258 } | |
259 | |
260 //////////////////////////////////////////////////////////////////////////////// | |
261 | |
262 // For algorithm refinement purposes it is useful to write subsets of the file | |
263 // format. This gives us the ability to estimate the entropy of the | |
264 // differential compression of the individual streams, which can provide | |
265 // invaluable insights. The default, of course, is to include all the streams. | |
266 // | |
267 enum FieldSelect { | |
268 INCLUDE_ABS32_ADDRESSES = 0x0001, | |
269 INCLUDE_REL32_ADDRESSES = 0x0002, | |
270 INCLUDE_ABS32_INDEXES = 0x0010, | |
271 INCLUDE_REL32_INDEXES = 0x0020, | |
272 INCLUDE_OPS = 0x0100, | |
273 INCLUDE_BYTES = 0x0200, | |
274 INCLUDE_COPY_COUNTS = 0x0400, | |
275 INCLUDE_MISC = 0x1000 | |
276 }; | |
277 | |
278 static FieldSelect GetFieldSelect() { | |
279 #if 1 | |
280 // TODO(sra): Use better configuration. | |
281 std::wstring s = base::SysInfo::GetEnvVar(L"A_FIELDS"); | |
282 if (!s.empty()) { | |
283 return static_cast<FieldSelect>(wcstoul(s.c_str(), 0, 0)); | |
284 } | |
285 #endif | |
286 return static_cast<FieldSelect>(~0); | |
287 } | |
288 | |
289 void EncodedProgram::WriteTo(SinkStreamSet* streams) { | |
290 FieldSelect select = GetFieldSelect(); | |
291 | |
292 // The order of fields must be consistent in WriteTo and ReadFrom, regardless | |
293 // of the streams used. The code can be configured with all kStreamXXX | |
294 // constants the same. | |
295 // | |
296 // If we change the code to pipeline reading with assembly (to avoid temporary | |
297 // storage vectors by consuming operands directly from the stream) then we | |
298 // need to read the base address and the random access address tables first, | |
299 // the rest can be interleaved. | |
300 | |
301 if (select & INCLUDE_MISC) { | |
302 // TODO(sra): write 64 bits. | |
303 streams->stream(kStreamMisc)->WriteVarint32( | |
304 static_cast<uint32>(image_base_)); | |
305 } | |
306 | |
307 if (select & INCLUDE_ABS32_ADDRESSES) | |
308 WriteU32Delta(abs32_rva_, streams->stream(kStreamAbs32Addresses)); | |
309 if (select & INCLUDE_REL32_ADDRESSES) | |
310 WriteU32Delta(rel32_rva_, streams->stream(kStreamRel32Addresses)); | |
311 if (select & INCLUDE_MISC) | |
312 WriteVector(origins_, streams->stream(kStreamOriginAddresses)); | |
313 if (select & INCLUDE_OPS) { | |
314 streams->stream(kStreamOps)->Reserve(ops_.size() + 5); // 5 for length. | |
315 WriteVector(ops_, streams->stream(kStreamOps)); | |
316 } | |
317 if (select & INCLUDE_COPY_COUNTS) | |
318 WriteVector(copy_counts_, streams->stream(kStreamCopyCounts)); | |
319 if (select & INCLUDE_BYTES) | |
320 WriteVectorU8(copy_bytes_, streams->stream(kStreamBytes)); | |
321 if (select & INCLUDE_ABS32_INDEXES) | |
322 WriteVector(abs32_ix_, streams->stream(kStreamAbs32Indexes)); | |
323 if (select & INCLUDE_REL32_INDEXES) | |
324 WriteVector(rel32_ix_, streams->stream(kStreamRel32Indexes)); | |
325 } | |
326 | |
327 bool EncodedProgram::ReadFrom(SourceStreamSet* streams) { | |
328 // TODO(sra): read 64 bits. | |
329 uint32 temp; | |
330 if (!streams->stream(kStreamMisc)->ReadVarint32(&temp)) | |
331 return false; | |
332 image_base_ = temp; | |
333 | |
334 if (!ReadU32Delta(&abs32_rva_, streams->stream(kStreamAbs32Addresses))) | |
335 return false; | |
336 if (!ReadU32Delta(&rel32_rva_, streams->stream(kStreamRel32Addresses))) | |
337 return false; | |
338 if (!ReadVector(&origins_, streams->stream(kStreamOriginAddresses))) | |
339 return false; | |
340 if (!ReadVector(&ops_, streams->stream(kStreamOps))) | |
341 return false; | |
342 if (!ReadVector(©_counts_, streams->stream(kStreamCopyCounts))) | |
343 return false; | |
344 if (!ReadVectorU8(©_bytes_, streams->stream(kStreamBytes))) | |
345 return false; | |
346 if (!ReadVector(&abs32_ix_, streams->stream(kStreamAbs32Indexes))) | |
347 return false; | |
348 if (!ReadVector(&rel32_ix_, streams->stream(kStreamRel32Indexes))) | |
349 return false; | |
350 | |
351 // Check that streams have been completely consumed. | |
352 for (int i = 0; i < kStreamLimit; ++i) { | |
353 if (streams->stream(i)->Remaining() > 0) | |
354 return false; | |
355 } | |
356 | |
357 return true; | |
358 } | |
359 | |
360 // Safe, non-throwing version of std::vector::at(). Returns 'true' for success, | |
361 // 'false' for out-of-bounds index error. | |
362 template<typename T> | |
363 bool VectorAt(const std::vector<T>& v, size_t index, T* output) { | |
364 if (index >= v.size()) | |
365 return false; | |
366 *output = v[index]; | |
367 return true; | |
368 } | |
369 | |
370 bool EncodedProgram::AssembleTo(SinkStream* final_buffer) { | |
371 // For the most part, the assembly process walks the various tables. | |
372 // ix_mumble is the index into the mumble table. | |
373 size_t ix_origins = 0; | |
374 size_t ix_copy_counts = 0; | |
375 size_t ix_copy_bytes = 0; | |
376 size_t ix_abs32_ix = 0; | |
377 size_t ix_rel32_ix = 0; | |
378 | |
379 RVA current_rva = 0; | |
380 | |
381 bool pending_base_relocation_table = false; | |
382 SinkStream bytes_following_base_relocation_table; | |
383 | |
384 SinkStream* output = final_buffer; | |
385 | |
386 for (size_t ix_ops = 0; ix_ops < ops_.size(); ++ix_ops) { | |
387 OP op = ops_[ix_ops]; | |
388 | |
389 switch (op) { | |
390 default: | |
391 return false; | |
392 | |
393 case ORIGIN: { | |
394 RVA section_rva; | |
395 if (!VectorAt(origins_, ix_origins, §ion_rva)) | |
396 return false; | |
397 ++ix_origins; | |
398 current_rva = section_rva; | |
399 break; | |
400 } | |
401 | |
402 case COPY: { | |
403 int count; | |
404 if (!VectorAt(copy_counts_, ix_copy_counts, &count)) | |
405 return false; | |
406 ++ix_copy_counts; | |
407 for (int i = 0; i < count; ++i) { | |
408 uint8 b; | |
409 if (!VectorAt(copy_bytes_, ix_copy_bytes, &b)) | |
410 return false; | |
411 ++ix_copy_bytes; | |
412 output->Write(&b, 1); | |
413 } | |
414 current_rva += count; | |
415 break; | |
416 } | |
417 | |
418 case COPY1: { | |
419 uint8 b; | |
420 if (!VectorAt(copy_bytes_, ix_copy_bytes, &b)) | |
421 return false; | |
422 ++ix_copy_bytes; | |
423 output->Write(&b, 1); | |
424 current_rva += 1; | |
425 break; | |
426 } | |
427 | |
428 case REL32: { | |
429 uint32 index; | |
430 if (!VectorAt(rel32_ix_, ix_rel32_ix, &index)) | |
431 return false; | |
432 ++ix_rel32_ix; | |
433 RVA rva; | |
434 if (!VectorAt(rel32_rva_, index, &rva)) | |
435 return false; | |
436 uint32 offset = (rva - (current_rva + 4)); | |
437 output->Write(&offset, 4); | |
438 current_rva += 4; | |
439 break; | |
440 } | |
441 | |
442 case ABS32: { | |
443 uint32 index; | |
444 if (!VectorAt(abs32_ix_, ix_abs32_ix, &index)) | |
445 return false; | |
446 ++ix_abs32_ix; | |
447 RVA rva; | |
448 if (!VectorAt(abs32_rva_, index, &rva)) | |
449 return false; | |
450 uint32 abs32 = static_cast<uint32>(rva + image_base_); | |
451 abs32_relocs_.push_back(current_rva); | |
452 output->Write(&abs32, 4); | |
453 current_rva += 4; | |
454 break; | |
455 } | |
456 | |
457 case MAKE_BASE_RELOCATION_TABLE: { | |
458 // We can see the base relocation anywhere, but we only have the | |
459 // information to generate it at the very end. So we divert the bytes | |
460 // we are generating to a temporary stream. | |
461 if (pending_base_relocation_table) // Can't have two base relocation | |
462 // tables. | |
463 return false; | |
464 | |
465 pending_base_relocation_table = true; | |
466 output = &bytes_following_base_relocation_table; | |
467 break; | |
468 // There is a potential problem *if* the instruction stream contains | |
469 // some REL32 relocations following the base relocation and in the same | |
470 // section. We don't know the size of the table, so 'current_rva' will | |
471 // be wrong, causing REL32 offsets to be miscalculated. This never | |
472 // happens; the base relocation table is usually in a section of its | |
473 // own, a data-only section, and following everything else in the | |
474 // executable except some padding zero bytes. We could fix this by | |
475 // emitting an ORIGIN after the MAKE_BASE_RELOCATION_TABLE. | |
476 } | |
477 } | |
478 } | |
479 | |
480 if (pending_base_relocation_table) { | |
481 GenerateBaseRelocations(final_buffer); | |
482 final_buffer->Append(&bytes_following_base_relocation_table); | |
483 } | |
484 | |
485 // Final verification check: did we consume all lists? | |
486 if (ix_copy_counts != copy_counts_.size()) | |
487 return false; | |
488 if (ix_copy_bytes != copy_bytes_.size()) | |
489 return false; | |
490 if (ix_abs32_ix != abs32_ix_.size()) | |
491 return false; | |
492 if (ix_rel32_ix != rel32_ix_.size()) | |
493 return false; | |
494 | |
495 return true; | |
496 } | |
497 | |
498 | |
499 // RelocBlock has the layout of a block of relocations in the base relocation | |
500 // table file format. | |
501 // | |
502 class RelocBlock { | |
503 public: | |
504 uint32 page_rva; | |
505 uint32 block_size; | |
506 uint16 relocs[4096]; // Allow up to one relocation per byte of a 4k page. | |
507 | |
508 RelocBlock() : page_rva(~0), block_size(8) {} | |
509 | |
510 void Add(uint16 item) { | |
511 relocs[(block_size-8)/2] = item; | |
512 block_size += 2; | |
513 } | |
514 | |
515 void Flush(SinkStream* buffer) { | |
516 if (block_size != 8) { | |
517 if (block_size % 4 != 0) { // Pad to make size multiple of 4 bytes. | |
518 Add(0); | |
519 } | |
520 buffer->Write(this, block_size); | |
521 block_size = 8; | |
522 } | |
523 } | |
524 }; | |
525 | |
526 COMPILE_ASSERT(offsetof(RelocBlock, relocs) == 8, reloc_block_header_size); | |
527 | |
528 void EncodedProgram::GenerateBaseRelocations(SinkStream* buffer) { | |
529 std::sort(abs32_relocs_.begin(), abs32_relocs_.end()); | |
530 | |
531 RelocBlock block; | |
532 | |
533 for (size_t i = 0; i < abs32_relocs_.size(); ++i) { | |
534 uint32 rva = abs32_relocs_[i]; | |
535 uint32 page_rva = rva & ~0xFFF; | |
536 if (page_rva != block.page_rva) { | |
537 block.Flush(buffer); | |
538 block.page_rva = page_rva; | |
539 } | |
540 block.Add(0x3000 | (rva & 0xFFF)); | |
541 } | |
542 block.Flush(buffer); | |
543 } | |
544 | |
545 //////////////////////////////////////////////////////////////////////////////// | |
546 | |
547 Status WriteEncodedProgram(EncodedProgram* encoded, SinkStreamSet* sink) { | |
548 encoded->WriteTo(sink); | |
549 return C_OK; | |
550 } | |
551 | |
552 Status ReadEncodedProgram(SourceStreamSet* streams, EncodedProgram** output) { | |
553 EncodedProgram* encoded = new EncodedProgram(); | |
554 if (encoded->ReadFrom(streams)) { | |
555 *output = encoded; | |
556 return C_OK; | |
557 } | |
558 delete encoded; | |
559 return C_DESERIALIZATION_FAILED; | |
560 } | |
561 | |
562 Status Assemble(EncodedProgram* encoded, SinkStream* buffer) { | |
563 bool assembled = encoded->AssembleTo(buffer); | |
564 if (assembled) | |
565 return C_OK; | |
566 return C_ASSEMBLY_FAILED; | |
567 } | |
568 | |
569 void DeleteEncodedProgram(EncodedProgram* encoded) { | |
570 delete encoded; | |
571 } | |
572 | |
573 } // end namespace | |
OLD | NEW |