Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(171)

Side by Side Diff: courgette/encoded_program.cc

Issue 115062: Move Courgette... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 11 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « courgette/encoded_program.h ('k') | courgette/encoded_program_fuzz_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "courgette/encoded_program.h"
6
7 #include <algorithm>
8 #include <map>
9 #include <string>
10 #include <vector>
11
12 #include "base/logging.h"
13 #include "base/sys_info.h"
14
15 #include "courgette/courgette.h"
16 #include "courgette/streams.h"
17
18 namespace courgette {
19
20 // Stream indexes.
21 const int kStreamMisc = 0;
22 const int kStreamOps = 1;
23 const int kStreamBytes = 2;
24 const int kStreamAbs32Indexes = 3;
25 const int kStreamRel32Indexes = 4;
26 const int kStreamAbs32Addresses = 5;
27 const int kStreamRel32Addresses = 6;
28 const int kStreamCopyCounts = 7;
29 const int kStreamOriginAddresses = kStreamMisc;
30
31 const int kStreamLimit = 9;
32
33 // Binary assembly language operations.
34 enum EncodedProgram::OP {
35 ORIGIN, // ORIGIN <rva> - set address for subsequent assembly.
36 COPY, // COPY <count> <bytes> - copy bytes to output.
37 COPY1, // COPY1 <byte> - same as COPY 1 <byte>.
38 REL32, // REL32 <index> - emit rel32 encoded reference to address at
39 // address table offset <index>
40 ABS32, // ABS32 <index> - emit abs32 encoded reference to address at
41 // address table offset <index>
42 MAKE_BASE_RELOCATION_TABLE, // Emit base relocation table blocks.
43 OP_LAST
44 };
45
46
47 // Constructor is here rather than in the header. Although the constructor
48 // appears to do nothing it is fact quite large because of the implict calls to
49 // field constructors. Ditto for the destructor.
50 EncodedProgram::EncodedProgram() {}
51 EncodedProgram::~EncodedProgram() {}
52
53 // Serializes a vector of integral values using Varint32 coding.
54 template<typename T>
55 void WriteVector(const std::vector<T>& items, SinkStream* buffer) {
56 size_t count = items.size();
57 buffer->WriteVarint32(count);
58 for (size_t i = 0; i < count; ++i) {
59 COMPILE_ASSERT(sizeof(T) <= sizeof(uint32), T_must_fit_in_uint32);
60 buffer->WriteVarint32(static_cast<uint32>(items[i]));
61 }
62 }
63
64 template<typename T>
65 bool ReadVector(std::vector<T>* items, SourceStream* buffer) {
66 uint32 count;
67 if (!buffer->ReadVarint32(&count))
68 return false;
69
70 items->clear();
71 items->reserve(count);
72 for (size_t i = 0; i < count; ++i) {
73 uint32 item;
74 if (!buffer->ReadVarint32(&item))
75 return false;
76 items->push_back(static_cast<T>(item));
77 }
78
79 return true;
80 }
81
82 // Serializes a vector, using delta coding followed by Varint32 coding.
83 void WriteU32Delta(const std::vector<uint32>& set, SinkStream* buffer) {
84 size_t count = set.size();
85 buffer->WriteVarint32(count);
86 uint32 prev = 0;
87 for (size_t i = 0; i < count; ++i) {
88 uint32 current = set[i];
89 uint32 delta = current - prev;
90 buffer->WriteVarint32(delta);
91 prev = current;
92 }
93 }
94
95 static bool ReadU32Delta(std::vector<uint32>* set, SourceStream* buffer) {
96 uint32 count;
97
98 if (!buffer->ReadVarint32(&count))
99 return false;
100
101 set->clear();
102 set->reserve(count);
103 uint32 prev = 0;
104
105 for (size_t i = 0; i < count; ++i) {
106 uint32 delta;
107 if (!buffer->ReadVarint32(&delta))
108 return false;
109 uint32 current = prev + delta;
110 set->push_back(current);
111 prev = current;
112 }
113
114 return true;
115 }
116
117 // Write a vector as the byte representation of the contents.
118 //
119 // (This only really makes sense for a type T that has sizeof(T)==1, otherwise
120 // serilized representation is not endian-agnositic. But it is useful to keep
121 // the possibility of a greater size for experiments comparing Varint32 encoding
122 // of a vector of larger integrals vs a plain form.)
123 //
124 template<typename T>
125 void WriteVectorU8(const std::vector<T>& items, SinkStream* buffer) {
126 uint32 count = items.size();
127 buffer->WriteVarint32(count);
128 if (count != 0) {
129 size_t byte_count = count * sizeof(T);
130 buffer->Write(static_cast<const void*>(&items[0]), byte_count);
131 }
132 }
133
134 template<typename T>
135 bool ReadVectorU8(std::vector<T>* items, SourceStream* buffer) {
136 uint32 count;
137 if (!buffer->ReadVarint32(&count))
138 return false;
139
140 items->clear();
141 items->resize(count);
142 if (count != 0) {
143 size_t byte_count = count * sizeof(T);
144 return buffer->Read(static_cast<void*>(&((*items)[0])), byte_count);
145 }
146 return true;
147 }
148
149 ////////////////////////////////////////////////////////////////////////////////
150
151 void EncodedProgram::DefineRel32Label(int index, RVA value) {
152 DefineLabelCommon(&rel32_rva_, index, value);
153 }
154
155 void EncodedProgram::DefineAbs32Label(int index, RVA value) {
156 DefineLabelCommon(&abs32_rva_, index, value);
157 }
158
159 static const RVA kUnassignedRVA = static_cast<RVA>(-1);
160
161 void EncodedProgram::DefineLabelCommon(std::vector<RVA>* rvas,
162 int index,
163 RVA rva) {
164 if (static_cast<int>(rvas->size()) <= index) {
165 rvas->resize(index + 1, kUnassignedRVA);
166 }
167 if ((*rvas)[index] != kUnassignedRVA) {
168 NOTREACHED() << "DefineLabel double assigned " << index;
169 }
170 (*rvas)[index] = rva;
171 }
172
173 void EncodedProgram::EndLabels() {
174 FinishLabelsCommon(&abs32_rva_);
175 FinishLabelsCommon(&rel32_rva_);
176 }
177
178 void EncodedProgram::FinishLabelsCommon(std::vector<RVA>* rvas) {
179 // Replace all unassigned slots with the value at the previous index so they
180 // delta-encode to zero. (There might be better values than zero. The way to
181 // get that is have the higher level assembly program assign the unassigned
182 // slots.)
183 RVA previous = 0;
184 size_t size = rvas->size();
185 for (size_t i = 0; i < size; ++i) {
186 if ((*rvas)[i] == kUnassignedRVA)
187 (*rvas)[i] = previous;
188 else
189 previous = (*rvas)[i];
190 }
191 }
192
193 void EncodedProgram::AddOrigin(RVA origin) {
194 ops_.push_back(ORIGIN);
195 origins_.push_back(origin);
196 }
197
198 void EncodedProgram::AddCopy(int count, const void* bytes) {
199 const uint8* source = static_cast<const uint8*>(bytes);
200
201 // Fold adjacent COPY instructions into one. This nearly halves the size of
202 // an EncodedProgram with only COPY1 instructions since there are approx plain
203 // 16 bytes per reloc. This has a working-set benefit during decompression.
204 // For compression of files with large differences this makes a small (4%)
205 // improvement in size. For files with small differences this degrades the
206 // compressed size by 1.3%
207 if (ops_.size() > 0) {
208 if (ops_.back() == COPY1) {
209 ops_.back() = COPY;
210 copy_counts_.push_back(1);
211 }
212 if (ops_.back() == COPY) {
213 copy_counts_.back() += count;
214 for (int i = 0; i < count; ++i) {
215 copy_bytes_.push_back(source[i]);
216 }
217 return;
218 }
219 }
220
221 if (count == 1) {
222 ops_.push_back(COPY1);
223 copy_bytes_.push_back(source[0]);
224 } else {
225 ops_.push_back(COPY);
226 copy_counts_.push_back(count);
227 for (int i = 0; i < count; ++i) {
228 copy_bytes_.push_back(source[i]);
229 }
230 }
231 }
232
233 void EncodedProgram::AddAbs32(int label_index) {
234 ops_.push_back(ABS32);
235 abs32_ix_.push_back(label_index);
236 }
237
238 void EncodedProgram::AddRel32(int label_index) {
239 ops_.push_back(REL32);
240 rel32_ix_.push_back(label_index);
241 }
242
243 void EncodedProgram::AddMakeRelocs() {
244 ops_.push_back(MAKE_BASE_RELOCATION_TABLE);
245 }
246
247 void EncodedProgram::DebuggingSummary() {
248 LOG(INFO) << "EncodedProgram Summary";
249 LOG(INFO) << " image base " << image_base_;
250 LOG(INFO) << " abs32 rvas " << abs32_rva_.size();
251 LOG(INFO) << " rel32 rvas " << rel32_rva_.size();
252 LOG(INFO) << " ops " << ops_.size();
253 LOG(INFO) << " origins " << origins_.size();
254 LOG(INFO) << " copy_counts " << copy_counts_.size();
255 LOG(INFO) << " copy_bytes " << copy_bytes_.size();
256 LOG(INFO) << " abs32_ix " << abs32_ix_.size();
257 LOG(INFO) << " rel32_ix " << rel32_ix_.size();
258 }
259
260 ////////////////////////////////////////////////////////////////////////////////
261
262 // For algorithm refinement purposes it is useful to write subsets of the file
263 // format. This gives us the ability to estimate the entropy of the
264 // differential compression of the individual streams, which can provide
265 // invaluable insights. The default, of course, is to include all the streams.
266 //
267 enum FieldSelect {
268 INCLUDE_ABS32_ADDRESSES = 0x0001,
269 INCLUDE_REL32_ADDRESSES = 0x0002,
270 INCLUDE_ABS32_INDEXES = 0x0010,
271 INCLUDE_REL32_INDEXES = 0x0020,
272 INCLUDE_OPS = 0x0100,
273 INCLUDE_BYTES = 0x0200,
274 INCLUDE_COPY_COUNTS = 0x0400,
275 INCLUDE_MISC = 0x1000
276 };
277
278 static FieldSelect GetFieldSelect() {
279 #if 1
280 // TODO(sra): Use better configuration.
281 std::wstring s = base::SysInfo::GetEnvVar(L"A_FIELDS");
282 if (!s.empty()) {
283 return static_cast<FieldSelect>(wcstoul(s.c_str(), 0, 0));
284 }
285 #endif
286 return static_cast<FieldSelect>(~0);
287 }
288
289 void EncodedProgram::WriteTo(SinkStreamSet* streams) {
290 FieldSelect select = GetFieldSelect();
291
292 // The order of fields must be consistent in WriteTo and ReadFrom, regardless
293 // of the streams used. The code can be configured with all kStreamXXX
294 // constants the same.
295 //
296 // If we change the code to pipeline reading with assembly (to avoid temporary
297 // storage vectors by consuming operands directly from the stream) then we
298 // need to read the base address and the random access address tables first,
299 // the rest can be interleaved.
300
301 if (select & INCLUDE_MISC) {
302 // TODO(sra): write 64 bits.
303 streams->stream(kStreamMisc)->WriteVarint32(
304 static_cast<uint32>(image_base_));
305 }
306
307 if (select & INCLUDE_ABS32_ADDRESSES)
308 WriteU32Delta(abs32_rva_, streams->stream(kStreamAbs32Addresses));
309 if (select & INCLUDE_REL32_ADDRESSES)
310 WriteU32Delta(rel32_rva_, streams->stream(kStreamRel32Addresses));
311 if (select & INCLUDE_MISC)
312 WriteVector(origins_, streams->stream(kStreamOriginAddresses));
313 if (select & INCLUDE_OPS) {
314 streams->stream(kStreamOps)->Reserve(ops_.size() + 5); // 5 for length.
315 WriteVector(ops_, streams->stream(kStreamOps));
316 }
317 if (select & INCLUDE_COPY_COUNTS)
318 WriteVector(copy_counts_, streams->stream(kStreamCopyCounts));
319 if (select & INCLUDE_BYTES)
320 WriteVectorU8(copy_bytes_, streams->stream(kStreamBytes));
321 if (select & INCLUDE_ABS32_INDEXES)
322 WriteVector(abs32_ix_, streams->stream(kStreamAbs32Indexes));
323 if (select & INCLUDE_REL32_INDEXES)
324 WriteVector(rel32_ix_, streams->stream(kStreamRel32Indexes));
325 }
326
327 bool EncodedProgram::ReadFrom(SourceStreamSet* streams) {
328 // TODO(sra): read 64 bits.
329 uint32 temp;
330 if (!streams->stream(kStreamMisc)->ReadVarint32(&temp))
331 return false;
332 image_base_ = temp;
333
334 if (!ReadU32Delta(&abs32_rva_, streams->stream(kStreamAbs32Addresses)))
335 return false;
336 if (!ReadU32Delta(&rel32_rva_, streams->stream(kStreamRel32Addresses)))
337 return false;
338 if (!ReadVector(&origins_, streams->stream(kStreamOriginAddresses)))
339 return false;
340 if (!ReadVector(&ops_, streams->stream(kStreamOps)))
341 return false;
342 if (!ReadVector(&copy_counts_, streams->stream(kStreamCopyCounts)))
343 return false;
344 if (!ReadVectorU8(&copy_bytes_, streams->stream(kStreamBytes)))
345 return false;
346 if (!ReadVector(&abs32_ix_, streams->stream(kStreamAbs32Indexes)))
347 return false;
348 if (!ReadVector(&rel32_ix_, streams->stream(kStreamRel32Indexes)))
349 return false;
350
351 // Check that streams have been completely consumed.
352 for (int i = 0; i < kStreamLimit; ++i) {
353 if (streams->stream(i)->Remaining() > 0)
354 return false;
355 }
356
357 return true;
358 }
359
360 // Safe, non-throwing version of std::vector::at(). Returns 'true' for success,
361 // 'false' for out-of-bounds index error.
362 template<typename T>
363 bool VectorAt(const std::vector<T>& v, size_t index, T* output) {
364 if (index >= v.size())
365 return false;
366 *output = v[index];
367 return true;
368 }
369
370 bool EncodedProgram::AssembleTo(SinkStream* final_buffer) {
371 // For the most part, the assembly process walks the various tables.
372 // ix_mumble is the index into the mumble table.
373 size_t ix_origins = 0;
374 size_t ix_copy_counts = 0;
375 size_t ix_copy_bytes = 0;
376 size_t ix_abs32_ix = 0;
377 size_t ix_rel32_ix = 0;
378
379 RVA current_rva = 0;
380
381 bool pending_base_relocation_table = false;
382 SinkStream bytes_following_base_relocation_table;
383
384 SinkStream* output = final_buffer;
385
386 for (size_t ix_ops = 0; ix_ops < ops_.size(); ++ix_ops) {
387 OP op = ops_[ix_ops];
388
389 switch (op) {
390 default:
391 return false;
392
393 case ORIGIN: {
394 RVA section_rva;
395 if (!VectorAt(origins_, ix_origins, &section_rva))
396 return false;
397 ++ix_origins;
398 current_rva = section_rva;
399 break;
400 }
401
402 case COPY: {
403 int count;
404 if (!VectorAt(copy_counts_, ix_copy_counts, &count))
405 return false;
406 ++ix_copy_counts;
407 for (int i = 0; i < count; ++i) {
408 uint8 b;
409 if (!VectorAt(copy_bytes_, ix_copy_bytes, &b))
410 return false;
411 ++ix_copy_bytes;
412 output->Write(&b, 1);
413 }
414 current_rva += count;
415 break;
416 }
417
418 case COPY1: {
419 uint8 b;
420 if (!VectorAt(copy_bytes_, ix_copy_bytes, &b))
421 return false;
422 ++ix_copy_bytes;
423 output->Write(&b, 1);
424 current_rva += 1;
425 break;
426 }
427
428 case REL32: {
429 uint32 index;
430 if (!VectorAt(rel32_ix_, ix_rel32_ix, &index))
431 return false;
432 ++ix_rel32_ix;
433 RVA rva;
434 if (!VectorAt(rel32_rva_, index, &rva))
435 return false;
436 uint32 offset = (rva - (current_rva + 4));
437 output->Write(&offset, 4);
438 current_rva += 4;
439 break;
440 }
441
442 case ABS32: {
443 uint32 index;
444 if (!VectorAt(abs32_ix_, ix_abs32_ix, &index))
445 return false;
446 ++ix_abs32_ix;
447 RVA rva;
448 if (!VectorAt(abs32_rva_, index, &rva))
449 return false;
450 uint32 abs32 = static_cast<uint32>(rva + image_base_);
451 abs32_relocs_.push_back(current_rva);
452 output->Write(&abs32, 4);
453 current_rva += 4;
454 break;
455 }
456
457 case MAKE_BASE_RELOCATION_TABLE: {
458 // We can see the base relocation anywhere, but we only have the
459 // information to generate it at the very end. So we divert the bytes
460 // we are generating to a temporary stream.
461 if (pending_base_relocation_table) // Can't have two base relocation
462 // tables.
463 return false;
464
465 pending_base_relocation_table = true;
466 output = &bytes_following_base_relocation_table;
467 break;
468 // There is a potential problem *if* the instruction stream contains
469 // some REL32 relocations following the base relocation and in the same
470 // section. We don't know the size of the table, so 'current_rva' will
471 // be wrong, causing REL32 offsets to be miscalculated. This never
472 // happens; the base relocation table is usually in a section of its
473 // own, a data-only section, and following everything else in the
474 // executable except some padding zero bytes. We could fix this by
475 // emitting an ORIGIN after the MAKE_BASE_RELOCATION_TABLE.
476 }
477 }
478 }
479
480 if (pending_base_relocation_table) {
481 GenerateBaseRelocations(final_buffer);
482 final_buffer->Append(&bytes_following_base_relocation_table);
483 }
484
485 // Final verification check: did we consume all lists?
486 if (ix_copy_counts != copy_counts_.size())
487 return false;
488 if (ix_copy_bytes != copy_bytes_.size())
489 return false;
490 if (ix_abs32_ix != abs32_ix_.size())
491 return false;
492 if (ix_rel32_ix != rel32_ix_.size())
493 return false;
494
495 return true;
496 }
497
498
499 // RelocBlock has the layout of a block of relocations in the base relocation
500 // table file format.
501 //
502 class RelocBlock {
503 public:
504 uint32 page_rva;
505 uint32 block_size;
506 uint16 relocs[4096]; // Allow up to one relocation per byte of a 4k page.
507
508 RelocBlock() : page_rva(~0), block_size(8) {}
509
510 void Add(uint16 item) {
511 relocs[(block_size-8)/2] = item;
512 block_size += 2;
513 }
514
515 void Flush(SinkStream* buffer) {
516 if (block_size != 8) {
517 if (block_size % 4 != 0) { // Pad to make size multiple of 4 bytes.
518 Add(0);
519 }
520 buffer->Write(this, block_size);
521 block_size = 8;
522 }
523 }
524 };
525
526 COMPILE_ASSERT(offsetof(RelocBlock, relocs) == 8, reloc_block_header_size);
527
528 void EncodedProgram::GenerateBaseRelocations(SinkStream* buffer) {
529 std::sort(abs32_relocs_.begin(), abs32_relocs_.end());
530
531 RelocBlock block;
532
533 for (size_t i = 0; i < abs32_relocs_.size(); ++i) {
534 uint32 rva = abs32_relocs_[i];
535 uint32 page_rva = rva & ~0xFFF;
536 if (page_rva != block.page_rva) {
537 block.Flush(buffer);
538 block.page_rva = page_rva;
539 }
540 block.Add(0x3000 | (rva & 0xFFF));
541 }
542 block.Flush(buffer);
543 }
544
545 ////////////////////////////////////////////////////////////////////////////////
546
547 Status WriteEncodedProgram(EncodedProgram* encoded, SinkStreamSet* sink) {
548 encoded->WriteTo(sink);
549 return C_OK;
550 }
551
552 Status ReadEncodedProgram(SourceStreamSet* streams, EncodedProgram** output) {
553 EncodedProgram* encoded = new EncodedProgram();
554 if (encoded->ReadFrom(streams)) {
555 *output = encoded;
556 return C_OK;
557 }
558 delete encoded;
559 return C_DESERIALIZATION_FAILED;
560 }
561
562 Status Assemble(EncodedProgram* encoded, SinkStream* buffer) {
563 bool assembled = encoded->AssembleTo(buffer);
564 if (assembled)
565 return C_OK;
566 return C_ASSEMBLY_FAILED;
567 }
568
569 void DeleteEncodedProgram(EncodedProgram* encoded) {
570 delete encoded;
571 }
572
573 } // end namespace
OLDNEW
« no previous file with comments | « courgette/encoded_program.h ('k') | courgette/encoded_program_fuzz_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698