Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1210)

Unified Diff: third_party/courgette/encoded_program.cc

Issue 115062: Move Courgette... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 11 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/courgette/encoded_program.h ('k') | third_party/courgette/encoded_program_fuzz_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/courgette/encoded_program.cc
===================================================================
--- third_party/courgette/encoded_program.cc (revision 15668)
+++ third_party/courgette/encoded_program.cc (working copy)
@@ -1,573 +0,0 @@
-// Copyright (c) 2009 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "third_party/courgette/encoded_program.h"
-
-#include <algorithm>
-#include <map>
-#include <string>
-#include <vector>
-
-#include "base/logging.h"
-#include "base/sys_info.h"
-
-#include "third_party/courgette/courgette.h"
-#include "third_party/courgette/streams.h"
-
-namespace courgette {
-
-// Stream indexes.
-const int kStreamMisc = 0;
-const int kStreamOps = 1;
-const int kStreamBytes = 2;
-const int kStreamAbs32Indexes = 3;
-const int kStreamRel32Indexes = 4;
-const int kStreamAbs32Addresses = 5;
-const int kStreamRel32Addresses = 6;
-const int kStreamCopyCounts = 7;
-const int kStreamOriginAddresses = kStreamMisc;
-
-const int kStreamLimit = 9;
-
-// Binary assembly language operations.
-enum EncodedProgram::OP {
- ORIGIN, // ORIGIN <rva> - set address for subsequent assembly.
- COPY, // COPY <count> <bytes> - copy bytes to output.
- COPY1, // COPY1 <byte> - same as COPY 1 <byte>.
- REL32, // REL32 <index> - emit rel32 encoded reference to address at
- // address table offset <index>
- ABS32, // ABS32 <index> - emit abs32 encoded reference to address at
- // address table offset <index>
- MAKE_BASE_RELOCATION_TABLE, // Emit base relocation table blocks.
- OP_LAST
-};
-
-
-// Constructor is here rather than in the header. Although the constructor
-// appears to do nothing it is fact quite large because of the implict calls to
-// field constructors. Ditto for the destructor.
-EncodedProgram::EncodedProgram() {}
-EncodedProgram::~EncodedProgram() {}
-
-// Serializes a vector of integral values using Varint32 coding.
-template<typename T>
-void WriteVector(const std::vector<T>& items, SinkStream* buffer) {
- size_t count = items.size();
- buffer->WriteVarint32(count);
- for (size_t i = 0; i < count; ++i) {
- COMPILE_ASSERT(sizeof(T) <= sizeof(uint32), T_must_fit_in_uint32);
- buffer->WriteVarint32(static_cast<uint32>(items[i]));
- }
-}
-
-template<typename T>
-bool ReadVector(std::vector<T>* items, SourceStream* buffer) {
- uint32 count;
- if (!buffer->ReadVarint32(&count))
- return false;
-
- items->clear();
- items->reserve(count);
- for (size_t i = 0; i < count; ++i) {
- uint32 item;
- if (!buffer->ReadVarint32(&item))
- return false;
- items->push_back(static_cast<T>(item));
- }
-
- return true;
-}
-
-// Serializes a vector, using delta coding followed by Varint32 coding.
-void WriteU32Delta(const std::vector<uint32>& set, SinkStream* buffer) {
- size_t count = set.size();
- buffer->WriteVarint32(count);
- uint32 prev = 0;
- for (size_t i = 0; i < count; ++i) {
- uint32 current = set[i];
- uint32 delta = current - prev;
- buffer->WriteVarint32(delta);
- prev = current;
- }
-}
-
-static bool ReadU32Delta(std::vector<uint32>* set, SourceStream* buffer) {
- uint32 count;
-
- if (!buffer->ReadVarint32(&count))
- return false;
-
- set->clear();
- set->reserve(count);
- uint32 prev = 0;
-
- for (size_t i = 0; i < count; ++i) {
- uint32 delta;
- if (!buffer->ReadVarint32(&delta))
- return false;
- uint32 current = prev + delta;
- set->push_back(current);
- prev = current;
- }
-
- return true;
-}
-
-// Write a vector as the byte representation of the contents.
-//
-// (This only really makes sense for a type T that has sizeof(T)==1, otherwise
-// serilized representation is not endian-agnositic. But it is useful to keep
-// the possibility of a greater size for experiments comparing Varint32 encoding
-// of a vector of larger integrals vs a plain form.)
-//
-template<typename T>
-void WriteVectorU8(const std::vector<T>& items, SinkStream* buffer) {
- uint32 count = items.size();
- buffer->WriteVarint32(count);
- if (count != 0) {
- size_t byte_count = count * sizeof(T);
- buffer->Write(static_cast<const void*>(&items[0]), byte_count);
- }
-}
-
-template<typename T>
-bool ReadVectorU8(std::vector<T>* items, SourceStream* buffer) {
- uint32 count;
- if (!buffer->ReadVarint32(&count))
- return false;
-
- items->clear();
- items->resize(count);
- if (count != 0) {
- size_t byte_count = count * sizeof(T);
- return buffer->Read(static_cast<void*>(&((*items)[0])), byte_count);
- }
- return true;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-void EncodedProgram::DefineRel32Label(int index, RVA value) {
- DefineLabelCommon(&rel32_rva_, index, value);
-}
-
-void EncodedProgram::DefineAbs32Label(int index, RVA value) {
- DefineLabelCommon(&abs32_rva_, index, value);
-}
-
-static const RVA kUnassignedRVA = static_cast<RVA>(-1);
-
-void EncodedProgram::DefineLabelCommon(std::vector<RVA>* rvas,
- int index,
- RVA rva) {
- if (static_cast<int>(rvas->size()) <= index) {
- rvas->resize(index + 1, kUnassignedRVA);
- }
- if ((*rvas)[index] != kUnassignedRVA) {
- NOTREACHED() << "DefineLabel double assigned " << index;
- }
- (*rvas)[index] = rva;
-}
-
-void EncodedProgram::EndLabels() {
- FinishLabelsCommon(&abs32_rva_);
- FinishLabelsCommon(&rel32_rva_);
-}
-
-void EncodedProgram::FinishLabelsCommon(std::vector<RVA>* rvas) {
- // Replace all unassigned slots with the value at the previous index so they
- // delta-encode to zero. (There might be better values than zero. The way to
- // get that is have the higher level assembly program assign the unassigned
- // slots.)
- RVA previous = 0;
- size_t size = rvas->size();
- for (size_t i = 0; i < size; ++i) {
- if ((*rvas)[i] == kUnassignedRVA)
- (*rvas)[i] = previous;
- else
- previous = (*rvas)[i];
- }
-}
-
-void EncodedProgram::AddOrigin(RVA origin) {
- ops_.push_back(ORIGIN);
- origins_.push_back(origin);
-}
-
-void EncodedProgram::AddCopy(int count, const void* bytes) {
- const uint8* source = static_cast<const uint8*>(bytes);
-
- // Fold adjacent COPY instructions into one. This nearly halves the size of
- // an EncodedProgram with only COPY1 instructions since there are approx plain
- // 16 bytes per reloc. This has a working-set benefit during decompression.
- // For compression of files with large differences this makes a small (4%)
- // improvement in size. For files with small differences this degrades the
- // compressed size by 1.3%
- if (ops_.size() > 0) {
- if (ops_.back() == COPY1) {
- ops_.back() = COPY;
- copy_counts_.push_back(1);
- }
- if (ops_.back() == COPY) {
- copy_counts_.back() += count;
- for (int i = 0; i < count; ++i) {
- copy_bytes_.push_back(source[i]);
- }
- return;
- }
- }
-
- if (count == 1) {
- ops_.push_back(COPY1);
- copy_bytes_.push_back(source[0]);
- } else {
- ops_.push_back(COPY);
- copy_counts_.push_back(count);
- for (int i = 0; i < count; ++i) {
- copy_bytes_.push_back(source[i]);
- }
- }
-}
-
-void EncodedProgram::AddAbs32(int label_index) {
- ops_.push_back(ABS32);
- abs32_ix_.push_back(label_index);
-}
-
-void EncodedProgram::AddRel32(int label_index) {
- ops_.push_back(REL32);
- rel32_ix_.push_back(label_index);
-}
-
-void EncodedProgram::AddMakeRelocs() {
- ops_.push_back(MAKE_BASE_RELOCATION_TABLE);
-}
-
-void EncodedProgram::DebuggingSummary() {
- LOG(INFO) << "EncodedProgram Summary";
- LOG(INFO) << " image base " << image_base_;
- LOG(INFO) << " abs32 rvas " << abs32_rva_.size();
- LOG(INFO) << " rel32 rvas " << rel32_rva_.size();
- LOG(INFO) << " ops " << ops_.size();
- LOG(INFO) << " origins " << origins_.size();
- LOG(INFO) << " copy_counts " << copy_counts_.size();
- LOG(INFO) << " copy_bytes " << copy_bytes_.size();
- LOG(INFO) << " abs32_ix " << abs32_ix_.size();
- LOG(INFO) << " rel32_ix " << rel32_ix_.size();
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-// For algorithm refinement purposes it is useful to write subsets of the file
-// format. This gives us the ability to estimate the entropy of the
-// differential compression of the individual streams, which can provide
-// invaluable insights. The default, of course, is to include all the streams.
-//
-enum FieldSelect {
- INCLUDE_ABS32_ADDRESSES = 0x0001,
- INCLUDE_REL32_ADDRESSES = 0x0002,
- INCLUDE_ABS32_INDEXES = 0x0010,
- INCLUDE_REL32_INDEXES = 0x0020,
- INCLUDE_OPS = 0x0100,
- INCLUDE_BYTES = 0x0200,
- INCLUDE_COPY_COUNTS = 0x0400,
- INCLUDE_MISC = 0x1000
-};
-
-static FieldSelect GetFieldSelect() {
-#if 1
- // TODO(sra): Use better configuration.
- std::wstring s = base::SysInfo::GetEnvVar(L"A_FIELDS");
- if (!s.empty()) {
- return static_cast<FieldSelect>(wcstoul(s.c_str(), 0, 0));
- }
-#endif
- return static_cast<FieldSelect>(~0);
-}
-
-void EncodedProgram::WriteTo(SinkStreamSet* streams) {
- FieldSelect select = GetFieldSelect();
-
- // The order of fields must be consistent in WriteTo and ReadFrom, regardless
- // of the streams used. The code can be configured with all kStreamXXX
- // constants the same.
- //
- // If we change the code to pipeline reading with assembly (to avoid temporary
- // storage vectors by consuming operands directly from the stream) then we
- // need to read the base address and the random access address tables first,
- // the rest can be interleaved.
-
- if (select & INCLUDE_MISC) {
- // TODO(sra): write 64 bits.
- streams->stream(kStreamMisc)->WriteVarint32(
- static_cast<uint32>(image_base_));
- }
-
- if (select & INCLUDE_ABS32_ADDRESSES)
- WriteU32Delta(abs32_rva_, streams->stream(kStreamAbs32Addresses));
- if (select & INCLUDE_REL32_ADDRESSES)
- WriteU32Delta(rel32_rva_, streams->stream(kStreamRel32Addresses));
- if (select & INCLUDE_MISC)
- WriteVector(origins_, streams->stream(kStreamOriginAddresses));
- if (select & INCLUDE_OPS) {
- streams->stream(kStreamOps)->Reserve(ops_.size() + 5); // 5 for length.
- WriteVector(ops_, streams->stream(kStreamOps));
- }
- if (select & INCLUDE_COPY_COUNTS)
- WriteVector(copy_counts_, streams->stream(kStreamCopyCounts));
- if (select & INCLUDE_BYTES)
- WriteVectorU8(copy_bytes_, streams->stream(kStreamBytes));
- if (select & INCLUDE_ABS32_INDEXES)
- WriteVector(abs32_ix_, streams->stream(kStreamAbs32Indexes));
- if (select & INCLUDE_REL32_INDEXES)
- WriteVector(rel32_ix_, streams->stream(kStreamRel32Indexes));
-}
-
-bool EncodedProgram::ReadFrom(SourceStreamSet* streams) {
- // TODO(sra): read 64 bits.
- uint32 temp;
- if (!streams->stream(kStreamMisc)->ReadVarint32(&temp))
- return false;
- image_base_ = temp;
-
- if (!ReadU32Delta(&abs32_rva_, streams->stream(kStreamAbs32Addresses)))
- return false;
- if (!ReadU32Delta(&rel32_rva_, streams->stream(kStreamRel32Addresses)))
- return false;
- if (!ReadVector(&origins_, streams->stream(kStreamOriginAddresses)))
- return false;
- if (!ReadVector(&ops_, streams->stream(kStreamOps)))
- return false;
- if (!ReadVector(&copy_counts_, streams->stream(kStreamCopyCounts)))
- return false;
- if (!ReadVectorU8(&copy_bytes_, streams->stream(kStreamBytes)))
- return false;
- if (!ReadVector(&abs32_ix_, streams->stream(kStreamAbs32Indexes)))
- return false;
- if (!ReadVector(&rel32_ix_, streams->stream(kStreamRel32Indexes)))
- return false;
-
- // Check that streams have been completely consumed.
- for (int i = 0; i < kStreamLimit; ++i) {
- if (streams->stream(i)->Remaining() > 0)
- return false;
- }
-
- return true;
-}
-
-// Safe, non-throwing version of std::vector::at(). Returns 'true' for success,
-// 'false' for out-of-bounds index error.
-template<typename T>
-bool VectorAt(const std::vector<T>& v, size_t index, T* output) {
- if (index >= v.size())
- return false;
- *output = v[index];
- return true;
-}
-
-bool EncodedProgram::AssembleTo(SinkStream* final_buffer) {
- // For the most part, the assembly process walks the various tables.
- // ix_mumble is the index into the mumble table.
- size_t ix_origins = 0;
- size_t ix_copy_counts = 0;
- size_t ix_copy_bytes = 0;
- size_t ix_abs32_ix = 0;
- size_t ix_rel32_ix = 0;
-
- RVA current_rva = 0;
-
- bool pending_base_relocation_table = false;
- SinkStream bytes_following_base_relocation_table;
-
- SinkStream* output = final_buffer;
-
- for (size_t ix_ops = 0; ix_ops < ops_.size(); ++ix_ops) {
- OP op = ops_[ix_ops];
-
- switch (op) {
- default:
- return false;
-
- case ORIGIN: {
- RVA section_rva;
- if (!VectorAt(origins_, ix_origins, &section_rva))
- return false;
- ++ix_origins;
- current_rva = section_rva;
- break;
- }
-
- case COPY: {
- int count;
- if (!VectorAt(copy_counts_, ix_copy_counts, &count))
- return false;
- ++ix_copy_counts;
- for (int i = 0; i < count; ++i) {
- uint8 b;
- if (!VectorAt(copy_bytes_, ix_copy_bytes, &b))
- return false;
- ++ix_copy_bytes;
- output->Write(&b, 1);
- }
- current_rva += count;
- break;
- }
-
- case COPY1: {
- uint8 b;
- if (!VectorAt(copy_bytes_, ix_copy_bytes, &b))
- return false;
- ++ix_copy_bytes;
- output->Write(&b, 1);
- current_rva += 1;
- break;
- }
-
- case REL32: {
- uint32 index;
- if (!VectorAt(rel32_ix_, ix_rel32_ix, &index))
- return false;
- ++ix_rel32_ix;
- RVA rva;
- if (!VectorAt(rel32_rva_, index, &rva))
- return false;
- uint32 offset = (rva - (current_rva + 4));
- output->Write(&offset, 4);
- current_rva += 4;
- break;
- }
-
- case ABS32: {
- uint32 index;
- if (!VectorAt(abs32_ix_, ix_abs32_ix, &index))
- return false;
- ++ix_abs32_ix;
- RVA rva;
- if (!VectorAt(abs32_rva_, index, &rva))
- return false;
- uint32 abs32 = static_cast<uint32>(rva + image_base_);
- abs32_relocs_.push_back(current_rva);
- output->Write(&abs32, 4);
- current_rva += 4;
- break;
- }
-
- case MAKE_BASE_RELOCATION_TABLE: {
- // We can see the base relocation anywhere, but we only have the
- // information to generate it at the very end. So we divert the bytes
- // we are generating to a temporary stream.
- if (pending_base_relocation_table) // Can't have two base relocation
- // tables.
- return false;
-
- pending_base_relocation_table = true;
- output = &bytes_following_base_relocation_table;
- break;
- // There is a potential problem *if* the instruction stream contains
- // some REL32 relocations following the base relocation and in the same
- // section. We don't know the size of the table, so 'current_rva' will
- // be wrong, causing REL32 offsets to be miscalculated. This never
- // happens; the base relocation table is usually in a section of its
- // own, a data-only section, and following everything else in the
- // executable except some padding zero bytes. We could fix this by
- // emitting an ORIGIN after the MAKE_BASE_RELOCATION_TABLE.
- }
- }
- }
-
- if (pending_base_relocation_table) {
- GenerateBaseRelocations(final_buffer);
- final_buffer->Append(&bytes_following_base_relocation_table);
- }
-
- // Final verification check: did we consume all lists?
- if (ix_copy_counts != copy_counts_.size())
- return false;
- if (ix_copy_bytes != copy_bytes_.size())
- return false;
- if (ix_abs32_ix != abs32_ix_.size())
- return false;
- if (ix_rel32_ix != rel32_ix_.size())
- return false;
-
- return true;
-}
-
-
-// RelocBlock has the layout of a block of relocations in the base relocation
-// table file format.
-//
-class RelocBlock {
- public:
- uint32 page_rva;
- uint32 block_size;
- uint16 relocs[4096]; // Allow up to one relocation per byte of a 4k page.
-
- RelocBlock() : page_rva(~0), block_size(8) {}
-
- void Add(uint16 item) {
- relocs[(block_size-8)/2] = item;
- block_size += 2;
- }
-
- void Flush(SinkStream* buffer) {
- if (block_size != 8) {
- if (block_size % 4 != 0) { // Pad to make size multiple of 4 bytes.
- Add(0);
- }
- buffer->Write(this, block_size);
- block_size = 8;
- }
- }
-};
-
-COMPILE_ASSERT(offsetof(RelocBlock, relocs) == 8, reloc_block_header_size);
-
-void EncodedProgram::GenerateBaseRelocations(SinkStream* buffer) {
- std::sort(abs32_relocs_.begin(), abs32_relocs_.end());
-
- RelocBlock block;
-
- for (size_t i = 0; i < abs32_relocs_.size(); ++i) {
- uint32 rva = abs32_relocs_[i];
- uint32 page_rva = rva & ~0xFFF;
- if (page_rva != block.page_rva) {
- block.Flush(buffer);
- block.page_rva = page_rva;
- }
- block.Add(0x3000 | (rva & 0xFFF));
- }
- block.Flush(buffer);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-Status WriteEncodedProgram(EncodedProgram* encoded, SinkStreamSet* sink) {
- encoded->WriteTo(sink);
- return C_OK;
-}
-
-Status ReadEncodedProgram(SourceStreamSet* streams, EncodedProgram** output) {
- EncodedProgram* encoded = new EncodedProgram();
- if (encoded->ReadFrom(streams)) {
- *output = encoded;
- return C_OK;
- }
- delete encoded;
- return C_DESERIALIZATION_FAILED;
-}
-
-Status Assemble(EncodedProgram* encoded, SinkStream* buffer) {
- bool assembled = encoded->AssembleTo(buffer);
- if (assembled)
- return C_OK;
- return C_ASSEMBLY_FAILED;
-}
-
-void DeleteEncodedProgram(EncodedProgram* encoded) {
- delete encoded;
-}
-
-} // end namespace
« no previous file with comments | « third_party/courgette/encoded_program.h ('k') | third_party/courgette/encoded_program_fuzz_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698