| Index: courgette/assembly_program.cc
|
| ===================================================================
|
| --- courgette/assembly_program.cc (revision 0)
|
| +++ courgette/assembly_program.cc (revision 0)
|
| @@ -0,0 +1,372 @@
|
| +// Copyright (c) 2009 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +#include "courgette/assembly_program.h"
|
| +
|
| +#include <memory.h>
|
| +#include <algorithm>
|
| +#include <map>
|
| +#include <set>
|
| +#include <sstream>
|
| +#include <vector>
|
| +
|
| +#include "base/logging.h"
|
| +
|
| +#include "courgette/courgette.h"
|
| +#include "courgette/encoded_program.h"
|
| +
|
| +namespace courgette {
|
| +
|
| +// Opcodes of simple assembly language
|
| +enum OP {
|
| + ORIGIN, // ORIGIN <rva> - set current address for assembly.
|
| + MAKERELOCS, // Generates a base relocation table.
|
| + DEFBYTE, // DEFBYTE <value> - emit a byte literal.
|
| + REL32, // REL32 <label> - emit a rel32 encoded reference to 'label'.
|
| + ABS32, // REL32 <label> - emit am abs32 encoded reference to 'label'.
|
| + LAST_OP
|
| +};
|
| +
|
| +// Base class for instructions. Because we have so many instructions we want to
|
| +// keep them as small as possible. For this reason we avoid virtual functions.
|
| +//
|
| +class Instruction {
|
| + public:
|
| + OP op() const { return static_cast<OP>(op_); }
|
| +
|
| + protected:
|
| + explicit Instruction(OP op) : op_(op), info_(0) {}
|
| + Instruction(OP op, unsigned int info) : op_(op), info_(info) {}
|
| +
|
| + uint32 op_ : 4; // A few bits to store the OP code.
|
| + uint32 info_ : 28; // Remaining bits in first word available to subclass.
|
| +
|
| + private:
|
| + DISALLOW_COPY_AND_ASSIGN(Instruction);
|
| +};
|
| +
|
| +namespace {
|
| +
|
| +// Sets the current address for the emitting instructions.
|
| +class OriginInstruction : public Instruction {
|
| + public:
|
| + explicit OriginInstruction(RVA rva) : Instruction(ORIGIN, 0), rva_(rva) {}
|
| + RVA origin_rva() const { return rva_; }
|
| + private:
|
| + RVA rva_;
|
| +};
|
| +
|
| +// Emits an entire base relocation table.
|
| +class MakeRelocsInstruction : public Instruction {
|
| + public:
|
| + MakeRelocsInstruction() : Instruction(MAKERELOCS) {}
|
| +};
|
| +
|
| +// Emits a single byte.
|
| +class ByteInstruction : public Instruction {
|
| + public:
|
| + explicit ByteInstruction(uint8 value) : Instruction(DEFBYTE, value) {}
|
| + uint8 byte_value() const { return info_; }
|
| +};
|
| +
|
| +// A ABS32 to REL32 instruction emits a reference to a label's address.
|
| +class InstructionWithLabel : public Instruction {
|
| + public:
|
| + InstructionWithLabel(OP op, Label* label)
|
| + : Instruction(op, 0), label_(label) {
|
| + if (label == NULL) NOTREACHED();
|
| + }
|
| + Label* label() const { return label_; }
|
| + private:
|
| + Label* label_;
|
| +};
|
| +
|
| +} // namespace
|
| +
|
| +AssemblyProgram::AssemblyProgram()
|
| + : image_base_(0),
|
| + byte_instruction_cache_(NULL) {
|
| +}
|
| +
|
| +static void DeleteContainedLabels(const RVAToLabel& labels) {
|
| + for (RVAToLabel::const_iterator p = labels.begin(); p != labels.end(); ++p)
|
| + delete p->second;
|
| +}
|
| +
|
| +AssemblyProgram::~AssemblyProgram() {
|
| + for (size_t i = 0; i < instructions_.size(); ++i) {
|
| + Instruction* instruction = instructions_[i];
|
| + if (instruction->op() != DEFBYTE) // Will be in byte_instruction_cache_.
|
| + delete instruction;
|
| + }
|
| + if (byte_instruction_cache_) {
|
| + for (size_t i = 0; i < 256; ++i)
|
| + delete byte_instruction_cache_[i];
|
| + delete[] byte_instruction_cache_;
|
| + }
|
| + DeleteContainedLabels(rel32_labels_);
|
| + DeleteContainedLabels(abs32_labels_);
|
| +}
|
| +
|
| +void AssemblyProgram::EmitMakeRelocsInstruction() {
|
| + Emit(new MakeRelocsInstruction());
|
| +}
|
| +
|
| +void AssemblyProgram::EmitOriginInstruction(RVA rva) {
|
| + Emit(new OriginInstruction(rva));
|
| +}
|
| +
|
| +void AssemblyProgram::EmitByteInstruction(uint8 byte) {
|
| + Emit(GetByteInstruction(byte));
|
| +}
|
| +
|
| +void AssemblyProgram::EmitRel32(Label* label) {
|
| + Emit(new InstructionWithLabel(REL32, label));
|
| +}
|
| +
|
| +void AssemblyProgram::EmitAbs32(Label* label) {
|
| + Emit(new InstructionWithLabel(ABS32, label));
|
| +}
|
| +
|
| +Label* AssemblyProgram::FindOrMakeAbs32Label(RVA rva) {
|
| + return FindLabel(rva, &abs32_labels_);
|
| +}
|
| +
|
| +Label* AssemblyProgram::FindOrMakeRel32Label(RVA rva) {
|
| + return FindLabel(rva, &rel32_labels_);
|
| +}
|
| +
|
| +void AssemblyProgram::DefaultAssignIndexes() {
|
| + DefaultAssignIndexes(&abs32_labels_);
|
| + DefaultAssignIndexes(&rel32_labels_);
|
| +}
|
| +
|
| +void AssemblyProgram::UnassignIndexes() {
|
| + UnassignIndexes(&abs32_labels_);
|
| + UnassignIndexes(&rel32_labels_);
|
| +}
|
| +
|
| +void AssemblyProgram::AssignRemainingIndexes() {
|
| + AssignRemainingIndexes(&abs32_labels_);
|
| + AssignRemainingIndexes(&rel32_labels_);
|
| +}
|
| +
|
| +Label* AssemblyProgram::InstructionAbs32Label(
|
| + const Instruction* instruction) const {
|
| + if (instruction->op() == ABS32)
|
| + return static_cast<const InstructionWithLabel*>(instruction)->label();
|
| + return NULL;
|
| +}
|
| +
|
| +Label* AssemblyProgram::InstructionRel32Label(
|
| + const Instruction* instruction) const {
|
| + if (instruction->op() == REL32)
|
| + return static_cast<const InstructionWithLabel*>(instruction)->label();
|
| + return NULL;
|
| +}
|
| +
|
| +Label* AssemblyProgram::FindLabel(RVA rva, RVAToLabel* labels) {
|
| + Label*& slot = (*labels)[rva];
|
| + if (slot == 0) {
|
| + slot = new Label(rva);
|
| + }
|
| + return slot;
|
| +}
|
| +
|
| +void AssemblyProgram::UnassignIndexes(RVAToLabel* labels) {
|
| + for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
|
| + Label* current = p->second;
|
| + current->index_ = Label::kNoIndex;
|
| + }
|
| +}
|
| +
|
| +// DefaultAssignIndexes takes a set of labels and assigns indexes in increasing
|
| +// address order.
|
| +//
|
| +void AssemblyProgram::DefaultAssignIndexes(RVAToLabel* labels) {
|
| + int index = 0;
|
| + for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
|
| + Label* current = p->second;
|
| + if (current->index_ != Label::kNoIndex)
|
| + NOTREACHED();
|
| + current->index_ = index;
|
| + ++index;
|
| + }
|
| +}
|
| +
|
| +// AssignRemainingIndexes assigns indexes to any addresses (labels) that are not
|
| +// yet assigned an index.
|
| +//
|
| +void AssemblyProgram::AssignRemainingIndexes(RVAToLabel* labels) {
|
| + // An address table compresses best when each index is associated with an
|
| + // address that is slight larger than the previous index.
|
| +
|
| + // First see which indexes have not been used. The 'available' vector could
|
| + // grow even bigger, but the number of addresses is a better starting size
|
| + // than empty.
|
| + std::vector<bool> available(labels->size(), true);
|
| + int used = 0;
|
| +
|
| + for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
|
| + size_t index = p->second->index_;
|
| + if (index != Label::kNoIndex) {
|
| + while (index >= available.size())
|
| + available.push_back(true);
|
| + available.at(index) = false;
|
| + ++used;
|
| + }
|
| + }
|
| +
|
| + LOG(INFO) << used << " of " << labels->size() << " labels pre-assigned";
|
| +
|
| + // Are there any unused labels that happen to be adjacent following a used
|
| + // label?
|
| + //
|
| + int fill_forward_count = 0;
|
| + Label* prev = 0;
|
| + for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
|
| + Label* current = p->second;
|
| + if (current->index_ == Label::kNoIndex) {
|
| + size_t index = 0;
|
| + if (prev && prev->index_ != Label::kNoIndex)
|
| + index = prev->index_ + 1;
|
| + if (index < available.size() && available.at(index)) {
|
| + current->index_ = index;
|
| + available.at(index) = false;
|
| + ++fill_forward_count;
|
| + }
|
| + }
|
| + prev = current;
|
| + }
|
| +
|
| + // Are there any unused labels that happen to be adjacent preceeding a used
|
| + // label?
|
| + //
|
| + int fill_backward_count = 0;
|
| + int backward_refs = 0;
|
| + prev = 0;
|
| + for (RVAToLabel::reverse_iterator p = labels->rbegin();
|
| + p != labels->rend();
|
| + ++p) {
|
| + Label* current = p->second;
|
| + if (current->index_ == Label::kNoIndex) {
|
| + int prev_index;
|
| + if (prev)
|
| + prev_index = prev->index_;
|
| + else
|
| + prev_index = available.size();
|
| + if (prev_index != 0 &&
|
| + prev_index != Label::kNoIndex &&
|
| + available.at(prev_index - 1)) {
|
| + current->index_ = prev_index - 1;
|
| + available.at(current->index_) = false;
|
| + ++fill_backward_count;
|
| + }
|
| + }
|
| + prev = current;
|
| + }
|
| +
|
| + // Fill in any remaining indexes
|
| + int fill_infill_count = 0;
|
| + int index = 0;
|
| + for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
|
| + Label* current = p->second;
|
| + if (current->index_ == Label::kNoIndex) {
|
| + while (!available.at(index)) {
|
| + ++index;
|
| + }
|
| + current->index_ = index;
|
| + available.at(index) = false;
|
| + ++index;
|
| + ++fill_infill_count;
|
| + }
|
| + }
|
| +
|
| + LOG(INFO) << " fill"
|
| + << " forward " << fill_forward_count << " "
|
| + << " backward " << fill_backward_count << " "
|
| + << " infill " << fill_infill_count;
|
| +}
|
| +
|
| +typedef void (EncodedProgram::*DefineLabelMethod)(int index, RVA value);
|
| +
|
| +static void DefineLabels(const RVAToLabel& labels,
|
| + EncodedProgram* encoded_format,
|
| + DefineLabelMethod define_label) {
|
| + for (RVAToLabel::const_iterator p = labels.begin(); p != labels.end(); ++p) {
|
| + Label* label = p->second;
|
| + (encoded_format->*define_label)(label->index_, label->rva_);
|
| + }
|
| +}
|
| +
|
| +EncodedProgram* AssemblyProgram::Encode() const {
|
| + EncodedProgram* encoded = new EncodedProgram();
|
| +
|
| + encoded->set_image_base(image_base_);
|
| + DefineLabels(abs32_labels_, encoded, &EncodedProgram::DefineAbs32Label);
|
| + DefineLabels(rel32_labels_, encoded, &EncodedProgram::DefineRel32Label);
|
| + encoded->EndLabels();
|
| +
|
| + for (size_t i = 0; i < instructions_.size(); ++i) {
|
| + Instruction* instruction = instructions_[i];
|
| +
|
| + switch (instruction->op()) {
|
| + case ORIGIN: {
|
| + OriginInstruction* org = static_cast<OriginInstruction*>(instruction);
|
| + encoded->AddOrigin(org->origin_rva());
|
| + break;
|
| + }
|
| + case DEFBYTE: {
|
| + uint8 b = static_cast<ByteInstruction*>(instruction)->byte_value();
|
| + encoded->AddCopy(1, &b);
|
| + break;
|
| + }
|
| + case REL32: {
|
| + Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
|
| + encoded->AddRel32(label->index_);
|
| + break;
|
| + }
|
| + case ABS32: {
|
| + Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
|
| + encoded->AddAbs32(label->index_);
|
| + break;
|
| + }
|
| + case MAKERELOCS: {
|
| + encoded->AddMakeRelocs();
|
| + break;
|
| + }
|
| + default: {
|
| + NOTREACHED() << "Unknown Insn OP kind";
|
| + }
|
| + }
|
| + }
|
| +
|
| + return encoded;
|
| +}
|
| +
|
| +Instruction* AssemblyProgram::GetByteInstruction(uint8 byte) {
|
| + if (!byte_instruction_cache_) {
|
| + byte_instruction_cache_ = new Instruction*[256];
|
| + for (int i = 0; i < 256; ++i) {
|
| + byte_instruction_cache_[i] = new ByteInstruction(static_cast<uint8>(i));
|
| + }
|
| + }
|
| +
|
| + return byte_instruction_cache_[byte];
|
| +}
|
| +
|
| +////////////////////////////////////////////////////////////////////////////////
|
| +
|
| +Status Encode(AssemblyProgram* program, EncodedProgram** output) {
|
| + *output = NULL;
|
| + EncodedProgram *encoded = program->Encode();
|
| + if (encoded) {
|
| + *output = encoded;
|
| + return C_OK;
|
| + } else {
|
| + return C_GENERAL_ERROR;
|
| + }
|
| +}
|
| +
|
| +} // namespace courgette
|
|
|