third_party/courgette/encoded_program.cc - Issue 115062: Move Courgette...

Side by Side Diff: third_party/courgette/encoded_program.cc

Issue 115062: Move Courgette... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 11 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #include "third_party/courgette/encoded_program.h"

6

7 #include <algorithm>

8 #include <map>

9 #include <string>

10 #include <vector>

11

12 #include "base/logging.h"

13 #include "base/sys_info.h"

14

15 #include "third_party/courgette/courgette.h"

16 #include "third_party/courgette/streams.h"

17

18 namespace courgette {

19

20 // Stream indexes.

21 const int kStreamMisc = 0;

22 const int kStreamOps = 1;

23 const int kStreamBytes = 2;

24 const int kStreamAbs32Indexes = 3;

25 const int kStreamRel32Indexes = 4;

26 const int kStreamAbs32Addresses = 5;

27 const int kStreamRel32Addresses = 6;

28 const int kStreamCopyCounts = 7;

29 const int kStreamOriginAddresses = kStreamMisc;

30

31 const int kStreamLimit = 9;

32

33 // Binary assembly language operations.

34 enum EncodedProgram::OP {

35 ORIGIN, // ORIGIN <rva> - set address for subsequent assembly.

36 COPY, // COPY <count> <bytes> - copy bytes to output.

37 COPY1, // COPY1 <byte> - same as COPY 1 <byte>.

38 REL32, // REL32 <index> - emit rel32 encoded reference to address at

39 // address table offset <index>

40 ABS32, // ABS32 <index> - emit abs32 encoded reference to address at

41 // address table offset <index>

42 MAKE_BASE_RELOCATION_TABLE, // Emit base relocation table blocks.

43 OP_LAST

44 };

45

46

47 // Constructor is here rather than in the header. Although the constructor

48 // appears to do nothing it is fact quite large because of the implict calls to

49 // field constructors. Ditto for the destructor.

50 EncodedProgram::EncodedProgram() {}

51 EncodedProgram::~EncodedProgram() {}

52

53 // Serializes a vector of integral values using Varint32 coding.

54 template<typename T>

55 void WriteVector(const std::vector<T>& items, SinkStream* buffer) {

56 size_t count = items.size();

57 buffer->WriteVarint32(count);

58 for (size_t i = 0; i < count; ++i) {

59 COMPILE_ASSERT(sizeof(T) <= sizeof(uint32), T_must_fit_in_uint32);

60 buffer->WriteVarint32(static_cast<uint32>(items[i]));

61 }

62 }

63

64 template<typename T>

65 bool ReadVector(std::vector<T>* items, SourceStream* buffer) {

66 uint32 count;

67 if (!buffer->ReadVarint32(&count))

68 return false;

69

70 items->clear();

71 items->reserve(count);

72 for (size_t i = 0; i < count; ++i) {

73 uint32 item;

74 if (!buffer->ReadVarint32(&item))

75 return false;

76 items->push_back(static_cast<T>(item));

77 }

78

79 return true;

80 }

81

82 // Serializes a vector, using delta coding followed by Varint32 coding.

83 void WriteU32Delta(const std::vector<uint32>& set, SinkStream* buffer) {

84 size_t count = set.size();

85 buffer->WriteVarint32(count);

86 uint32 prev = 0;

87 for (size_t i = 0; i < count; ++i) {

88 uint32 current = set[i];

89 uint32 delta = current - prev;

90 buffer->WriteVarint32(delta);

91 prev = current;

92 }

93 }

94

95 static bool ReadU32Delta(std::vector<uint32>* set, SourceStream* buffer) {

96 uint32 count;

97

98 if (!buffer->ReadVarint32(&count))

99 return false;

100

101 set->clear();

102 set->reserve(count);

103 uint32 prev = 0;

104

105 for (size_t i = 0; i < count; ++i) {

106 uint32 delta;

107 if (!buffer->ReadVarint32(&delta))

108 return false;

109 uint32 current = prev + delta;

110 set->push_back(current);

111 prev = current;

112 }

113

114 return true;

115 }

116

117 // Write a vector as the byte representation of the contents.

118 //

119 // (This only really makes sense for a type T that has sizeof(T)==1, otherwise

120 // serilized representation is not endian-agnositic. But it is useful to keep

121 // the possibility of a greater size for experiments comparing Varint32 encoding

122 // of a vector of larger integrals vs a plain form.)

123 //

124 template<typename T>

125 void WriteVectorU8(const std::vector<T>& items, SinkStream* buffer) {

126 uint32 count = items.size();

127 buffer->WriteVarint32(count);

128 if (count != 0) {

129 size_t byte_count = count * sizeof(T);

130 buffer->Write(static_cast<const void*>(&items[0]), byte_count);

131 }

132 }

133

134 template<typename T>

135 bool ReadVectorU8(std::vector<T>* items, SourceStream* buffer) {

136 uint32 count;

137 if (!buffer->ReadVarint32(&count))

138 return false;

139

140 items->clear();

141 items->resize(count);

142 if (count != 0) {

143 size_t byte_count = count * sizeof(T);

144 return buffer->Read(static_cast<void>(&((items)[0])), byte_count);

145 }

146 return true;

147 }

148

149 ////////////////////////////////////////////////////////////////////////////////

150

151 void EncodedProgram::DefineRel32Label(int index, RVA value) {

152 DefineLabelCommon(&rel32_rva_, index, value);

153 }

154

155 void EncodedProgram::DefineAbs32Label(int index, RVA value) {

156 DefineLabelCommon(&abs32_rva_, index, value);

157 }

158

159 static const RVA kUnassignedRVA = static_cast<RVA>(-1);

160

161 void EncodedProgram::DefineLabelCommon(std::vector<RVA>* rvas,

162 int index,

163 RVA rva) {

164 if (static_cast<int>(rvas->size()) <= index) {

165 rvas->resize(index + 1, kUnassignedRVA);

166 }

167 if ((*rvas)[index] != kUnassignedRVA) {

168 NOTREACHED() << "DefineLabel double assigned " << index;

169 }

170 (*rvas)[index] = rva;

171 }

172

173 void EncodedProgram::EndLabels() {

174 FinishLabelsCommon(&abs32_rva_);

175 FinishLabelsCommon(&rel32_rva_);

176 }

177

178 void EncodedProgram::FinishLabelsCommon(std::vector<RVA>* rvas) {

179 // Replace all unassigned slots with the value at the previous index so they

180 // delta-encode to zero. (There might be better values than zero. The way to

181 // get that is have the higher level assembly program assign the unassigned

182 // slots.)

183 RVA previous = 0;

184 size_t size = rvas->size();

185 for (size_t i = 0; i < size; ++i) {

186 if ((*rvas)[i] == kUnassignedRVA)

187 (*rvas)[i] = previous;

188 else

189 previous = (*rvas)[i];

190 }

191 }

192

193 void EncodedProgram::AddOrigin(RVA origin) {

194 ops_.push_back(ORIGIN);

195 origins_.push_back(origin);

196 }

197

198 void EncodedProgram::AddCopy(int count, const void* bytes) {

199 const uint8* source = static_cast<const uint8*>(bytes);

200

201 // Fold adjacent COPY instructions into one. This nearly halves the size of

202 // an EncodedProgram with only COPY1 instructions since there are approx plain

203 // 16 bytes per reloc. This has a working-set benefit during decompression.

204 // For compression of files with large differences this makes a small (4%)

205 // improvement in size. For files with small differences this degrades the

206 // compressed size by 1.3%

207 if (ops_.size() > 0) {

208 if (ops_.back() == COPY1) {

209 ops_.back() = COPY;

210 copy_counts_.push_back(1);

211 }

212 if (ops_.back() == COPY) {

213 copy_counts_.back() += count;

214 for (int i = 0; i < count; ++i) {

215 copy_bytes_.push_back(source[i]);

216 }

217 return;

218 }

219 }

220

221 if (count == 1) {

222 ops_.push_back(COPY1);

223 copy_bytes_.push_back(source[0]);

224 } else {

225 ops_.push_back(COPY);

226 copy_counts_.push_back(count);

227 for (int i = 0; i < count; ++i) {

228 copy_bytes_.push_back(source[i]);

229 }

230 }

231 }

232

233 void EncodedProgram::AddAbs32(int label_index) {

234 ops_.push_back(ABS32);

235 abs32_ix_.push_back(label_index);

236 }

237

238 void EncodedProgram::AddRel32(int label_index) {

239 ops_.push_back(REL32);

240 rel32_ix_.push_back(label_index);

241 }

242

243 void EncodedProgram::AddMakeRelocs() {

244 ops_.push_back(MAKE_BASE_RELOCATION_TABLE);

245 }

246

247 void EncodedProgram::DebuggingSummary() {

248 LOG(INFO) << "EncodedProgram Summary";

249 LOG(INFO) << " image base " << image_base_;

250 LOG(INFO) << " abs32 rvas " << abs32_rva_.size();

251 LOG(INFO) << " rel32 rvas " << rel32_rva_.size();

252 LOG(INFO) << " ops " << ops_.size();

253 LOG(INFO) << " origins " << origins_.size();

254 LOG(INFO) << " copy_counts " << copy_counts_.size();

255 LOG(INFO) << " copy_bytes " << copy_bytes_.size();

256 LOG(INFO) << " abs32_ix " << abs32_ix_.size();

257 LOG(INFO) << " rel32_ix " << rel32_ix_.size();

258 }

259

260 ////////////////////////////////////////////////////////////////////////////////

261

262 // For algorithm refinement purposes it is useful to write subsets of the file

263 // format. This gives us the ability to estimate the entropy of the

264 // differential compression of the individual streams, which can provide

265 // invaluable insights. The default, of course, is to include all the streams.

266 //

267 enum FieldSelect {

268 INCLUDE_ABS32_ADDRESSES = 0x0001,

269 INCLUDE_REL32_ADDRESSES = 0x0002,

270 INCLUDE_ABS32_INDEXES = 0x0010,

271 INCLUDE_REL32_INDEXES = 0x0020,

272 INCLUDE_OPS = 0x0100,

273 INCLUDE_BYTES = 0x0200,

274 INCLUDE_COPY_COUNTS = 0x0400,

275 INCLUDE_MISC = 0x1000

276 };

277

278 static FieldSelect GetFieldSelect() {

279 #if 1

280 // TODO(sra): Use better configuration.

281 std::wstring s = base::SysInfo::GetEnvVar(L"A_FIELDS");

282 if (!s.empty()) {

283 return static_cast<FieldSelect>(wcstoul(s.c_str(), 0, 0));

284 }

285 #endif

286 return static_cast<FieldSelect>(~0);

287 }

288

289 void EncodedProgram::WriteTo(SinkStreamSet* streams) {

290 FieldSelect select = GetFieldSelect();

291

292 // The order of fields must be consistent in WriteTo and ReadFrom, regardless

293 // of the streams used. The code can be configured with all kStreamXXX

294 // constants the same.

295 //

296 // If we change the code to pipeline reading with assembly (to avoid temporary

297 // storage vectors by consuming operands directly from the stream) then we

298 // need to read the base address and the random access address tables first,

299 // the rest can be interleaved.

300

301 if (select & INCLUDE_MISC) {

302 // TODO(sra): write 64 bits.

303 streams->stream(kStreamMisc)->WriteVarint32(

304 static_cast<uint32>(image_base_));

305 }

306

307 if (select & INCLUDE_ABS32_ADDRESSES)

308 WriteU32Delta(abs32_rva_, streams->stream(kStreamAbs32Addresses));

309 if (select & INCLUDE_REL32_ADDRESSES)

310 WriteU32Delta(rel32_rva_, streams->stream(kStreamRel32Addresses));

311 if (select & INCLUDE_MISC)

312 WriteVector(origins_, streams->stream(kStreamOriginAddresses));

313 if (select & INCLUDE_OPS) {

314 streams->stream(kStreamOps)->Reserve(ops_.size() + 5); // 5 for length.

315 WriteVector(ops_, streams->stream(kStreamOps));

316 }

317 if (select & INCLUDE_COPY_COUNTS)

318 WriteVector(copy_counts_, streams->stream(kStreamCopyCounts));

319 if (select & INCLUDE_BYTES)

320 WriteVectorU8(copy_bytes_, streams->stream(kStreamBytes));

321 if (select & INCLUDE_ABS32_INDEXES)

322 WriteVector(abs32_ix_, streams->stream(kStreamAbs32Indexes));

323 if (select & INCLUDE_REL32_INDEXES)

324 WriteVector(rel32_ix_, streams->stream(kStreamRel32Indexes));

325 }

326

327 bool EncodedProgram::ReadFrom(SourceStreamSet* streams) {

328 // TODO(sra): read 64 bits.

329 uint32 temp;

330 if (!streams->stream(kStreamMisc)->ReadVarint32(&temp))

331 return false;

332 image_base_ = temp;

333

334 if (!ReadU32Delta(&abs32_rva_, streams->stream(kStreamAbs32Addresses)))

335 return false;

336 if (!ReadU32Delta(&rel32_rva_, streams->stream(kStreamRel32Addresses)))

337 return false;

338 if (!ReadVector(&origins_, streams->stream(kStreamOriginAddresses)))

339 return false;

340 if (!ReadVector(&ops_, streams->stream(kStreamOps)))

341 return false;

342 if (!ReadVector(&copy_counts_, streams->stream(kStreamCopyCounts)))

343 return false;

344 if (!ReadVectorU8(&copy_bytes_, streams->stream(kStreamBytes)))

345 return false;

346 if (!ReadVector(&abs32_ix_, streams->stream(kStreamAbs32Indexes)))

347 return false;

348 if (!ReadVector(&rel32_ix_, streams->stream(kStreamRel32Indexes)))

349 return false;

350

351 // Check that streams have been completely consumed.

352 for (int i = 0; i < kStreamLimit; ++i) {

353 if (streams->stream(i)->Remaining() > 0)

354 return false;

355 }

356

357 return true;

358 }

359

360 // Safe, non-throwing version of std::vector::at(). Returns 'true' for success,

361 // 'false' for out-of-bounds index error.

362 template<typename T>

363 bool VectorAt(const std::vector<T>& v, size_t index, T* output) {

364 if (index >= v.size())

365 return false;

366 *output = v[index];

367 return true;

368 }

369

370 bool EncodedProgram::AssembleTo(SinkStream* final_buffer) {

371 // For the most part, the assembly process walks the various tables.

372 // ix_mumble is the index into the mumble table.

373 size_t ix_origins = 0;

374 size_t ix_copy_counts = 0;

375 size_t ix_copy_bytes = 0;

376 size_t ix_abs32_ix = 0;

377 size_t ix_rel32_ix = 0;

378

379 RVA current_rva = 0;

380

381 bool pending_base_relocation_table = false;

382 SinkStream bytes_following_base_relocation_table;

383

384 SinkStream* output = final_buffer;

385

386 for (size_t ix_ops = 0; ix_ops < ops_.size(); ++ix_ops) {

387 OP op = ops_[ix_ops];

388

389 switch (op) {

390 default:

391 return false;

392

393 case ORIGIN: {

394 RVA section_rva;

395 if (!VectorAt(origins_, ix_origins, &section_rva))

396 return false;

397 ++ix_origins;

398 current_rva = section_rva;

399 break;

400 }

401

402 case COPY: {

403 int count;

404 if (!VectorAt(copy_counts_, ix_copy_counts, &count))

405 return false;

406 ++ix_copy_counts;

407 for (int i = 0; i < count; ++i) {

408 uint8 b;

409 if (!VectorAt(copy_bytes_, ix_copy_bytes, &b))

410 return false;

411 ++ix_copy_bytes;

412 output->Write(&b, 1);

413 }

414 current_rva += count;

415 break;

416 }

417

418 case COPY1: {

419 uint8 b;

420 if (!VectorAt(copy_bytes_, ix_copy_bytes, &b))

421 return false;

422 ++ix_copy_bytes;

423 output->Write(&b, 1);

424 current_rva += 1;

425 break;

426 }

427

428 case REL32: {

429 uint32 index;

430 if (!VectorAt(rel32_ix_, ix_rel32_ix, &index))

431 return false;

432 ++ix_rel32_ix;

433 RVA rva;

434 if (!VectorAt(rel32_rva_, index, &rva))

435 return false;

436 uint32 offset = (rva - (current_rva + 4));

437 output->Write(&offset, 4);

438 current_rva += 4;

439 break;

440 }

441

442 case ABS32: {

443 uint32 index;

444 if (!VectorAt(abs32_ix_, ix_abs32_ix, &index))

445 return false;

446 ++ix_abs32_ix;

447 RVA rva;

448 if (!VectorAt(abs32_rva_, index, &rva))

449 return false;

450 uint32 abs32 = static_cast<uint32>(rva + image_base_);

451 abs32_relocs_.push_back(current_rva);

452 output->Write(&abs32, 4);

453 current_rva += 4;

454 break;

455 }

456

457 case MAKE_BASE_RELOCATION_TABLE: {

458 // We can see the base relocation anywhere, but we only have the

459 // information to generate it at the very end. So we divert the bytes

460 // we are generating to a temporary stream.

461 if (pending_base_relocation_table) // Can't have two base relocation

462 // tables.

463 return false;

464

465 pending_base_relocation_table = true;

466 output = &bytes_following_base_relocation_table;

467 break;

468 // There is a potential problem if the instruction stream contains

469 // some REL32 relocations following the base relocation and in the same

470 // section. We don't know the size of the table, so 'current_rva' will

471 // be wrong, causing REL32 offsets to be miscalculated. This never

472 // happens; the base relocation table is usually in a section of its

473 // own, a data-only section, and following everything else in the

474 // executable except some padding zero bytes. We could fix this by

475 // emitting an ORIGIN after the MAKE_BASE_RELOCATION_TABLE.

476 }

477 }

478 }

479

480 if (pending_base_relocation_table) {

481 GenerateBaseRelocations(final_buffer);

482 final_buffer->Append(&bytes_following_base_relocation_table);

483 }

484

485 // Final verification check: did we consume all lists?

486 if (ix_copy_counts != copy_counts_.size())

487 return false;

488 if (ix_copy_bytes != copy_bytes_.size())

489 return false;

490 if (ix_abs32_ix != abs32_ix_.size())

491 return false;

492 if (ix_rel32_ix != rel32_ix_.size())

493 return false;

494

495 return true;

496 }

497

498

499 // RelocBlock has the layout of a block of relocations in the base relocation

500 // table file format.

501 //

502 class RelocBlock {

503 public:

504 uint32 page_rva;

505 uint32 block_size;

506 uint16 relocs[4096]; // Allow up to one relocation per byte of a 4k page.

507

508 RelocBlock() : page_rva(~0), block_size(8) {}

509

510 void Add(uint16 item) {

511 relocs[(block_size-8)/2] = item;

512 block_size += 2;

513 }

514

515 void Flush(SinkStream* buffer) {

516 if (block_size != 8) {

517 if (block_size % 4 != 0) { // Pad to make size multiple of 4 bytes.

518 Add(0);

519 }

520 buffer->Write(this, block_size);

521 block_size = 8;

522 }

523 }

524 };

525

526 COMPILE_ASSERT(offsetof(RelocBlock, relocs) == 8, reloc_block_header_size);

527

528 void EncodedProgram::GenerateBaseRelocations(SinkStream* buffer) {

529 std::sort(abs32_relocs_.begin(), abs32_relocs_.end());

530

531 RelocBlock block;

532

533 for (size_t i = 0; i < abs32_relocs_.size(); ++i) {

534 uint32 rva = abs32_relocs_[i];

535 uint32 page_rva = rva & ~0xFFF;

536 if (page_rva != block.page_rva) {

537 block.Flush(buffer);

538 block.page_rva = page_rva;

539 }

540 block.Add(0x3000 \| (rva & 0xFFF));

541 }

542 block.Flush(buffer);

543 }

544

545 ////////////////////////////////////////////////////////////////////////////////

546

547 Status WriteEncodedProgram(EncodedProgram* encoded, SinkStreamSet* sink) {

548 encoded->WriteTo(sink);

549 return C_OK;

550 }

551

552 Status ReadEncodedProgram(SourceStreamSet* streams, EncodedProgram** output) {

553 EncodedProgram* encoded = new EncodedProgram();

554 if (encoded->ReadFrom(streams)) {

555 *output = encoded;

556 return C_OK;

557 }

558 delete encoded;

559 return C_DESERIALIZATION_FAILED;

560 }

561

562 Status Assemble(EncodedProgram* encoded, SinkStream* buffer) {

563 bool assembled = encoded->AssembleTo(buffer);

564 if (assembled)

565 return C_OK;

566 return C_ASSEMBLY_FAILED;

567 }

568

569 void DeleteEncodedProgram(EncodedProgram* encoded) {

570 delete encoded;

571 }

572

573 } // end namespace

OLD	NEW

« no previous file with comments | « third_party/courgette/encoded_program.h ('k') | third_party/courgette/encoded_program_fuzz_unittest.cc » ('j') | no next file with comments »