OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "courgette/assembly_program.h" |
| 6 |
| 7 #include <memory.h> |
| 8 #include <algorithm> |
| 9 #include <map> |
| 10 #include <set> |
| 11 #include <sstream> |
| 12 #include <vector> |
| 13 |
| 14 #include "base/logging.h" |
| 15 |
| 16 #include "courgette/courgette.h" |
| 17 #include "courgette/encoded_program.h" |
| 18 |
| 19 namespace courgette { |
| 20 |
| 21 // Opcodes of simple assembly language |
| 22 enum OP { |
| 23 ORIGIN, // ORIGIN <rva> - set current address for assembly. |
| 24 MAKERELOCS, // Generates a base relocation table. |
| 25 DEFBYTE, // DEFBYTE <value> - emit a byte literal. |
| 26 REL32, // REL32 <label> - emit a rel32 encoded reference to 'label'. |
| 27 ABS32, // REL32 <label> - emit am abs32 encoded reference to 'label'. |
| 28 LAST_OP |
| 29 }; |
| 30 |
| 31 // Base class for instructions. Because we have so many instructions we want to |
| 32 // keep them as small as possible. For this reason we avoid virtual functions. |
| 33 // |
| 34 class Instruction { |
| 35 public: |
| 36 OP op() const { return static_cast<OP>(op_); } |
| 37 |
| 38 protected: |
| 39 explicit Instruction(OP op) : op_(op), info_(0) {} |
| 40 Instruction(OP op, unsigned int info) : op_(op), info_(info) {} |
| 41 |
| 42 uint32 op_ : 4; // A few bits to store the OP code. |
| 43 uint32 info_ : 28; // Remaining bits in first word available to subclass. |
| 44 |
| 45 private: |
| 46 DISALLOW_COPY_AND_ASSIGN(Instruction); |
| 47 }; |
| 48 |
| 49 namespace { |
| 50 |
| 51 // Sets the current address for the emitting instructions. |
| 52 class OriginInstruction : public Instruction { |
| 53 public: |
| 54 explicit OriginInstruction(RVA rva) : Instruction(ORIGIN, 0), rva_(rva) {} |
| 55 RVA origin_rva() const { return rva_; } |
| 56 private: |
| 57 RVA rva_; |
| 58 }; |
| 59 |
| 60 // Emits an entire base relocation table. |
| 61 class MakeRelocsInstruction : public Instruction { |
| 62 public: |
| 63 MakeRelocsInstruction() : Instruction(MAKERELOCS) {} |
| 64 }; |
| 65 |
| 66 // Emits a single byte. |
| 67 class ByteInstruction : public Instruction { |
| 68 public: |
| 69 explicit ByteInstruction(uint8 value) : Instruction(DEFBYTE, value) {} |
| 70 uint8 byte_value() const { return info_; } |
| 71 }; |
| 72 |
| 73 // A ABS32 to REL32 instruction emits a reference to a label's address. |
| 74 class InstructionWithLabel : public Instruction { |
| 75 public: |
| 76 InstructionWithLabel(OP op, Label* label) |
| 77 : Instruction(op, 0), label_(label) { |
| 78 if (label == NULL) NOTREACHED(); |
| 79 } |
| 80 Label* label() const { return label_; } |
| 81 private: |
| 82 Label* label_; |
| 83 }; |
| 84 |
| 85 } // namespace |
| 86 |
| 87 AssemblyProgram::AssemblyProgram() |
| 88 : image_base_(0), |
| 89 byte_instruction_cache_(NULL) { |
| 90 } |
| 91 |
| 92 static void DeleteContainedLabels(const RVAToLabel& labels) { |
| 93 for (RVAToLabel::const_iterator p = labels.begin(); p != labels.end(); ++p) |
| 94 delete p->second; |
| 95 } |
| 96 |
| 97 AssemblyProgram::~AssemblyProgram() { |
| 98 for (size_t i = 0; i < instructions_.size(); ++i) { |
| 99 Instruction* instruction = instructions_[i]; |
| 100 if (instruction->op() != DEFBYTE) // Will be in byte_instruction_cache_. |
| 101 delete instruction; |
| 102 } |
| 103 if (byte_instruction_cache_) { |
| 104 for (size_t i = 0; i < 256; ++i) |
| 105 delete byte_instruction_cache_[i]; |
| 106 delete[] byte_instruction_cache_; |
| 107 } |
| 108 DeleteContainedLabels(rel32_labels_); |
| 109 DeleteContainedLabels(abs32_labels_); |
| 110 } |
| 111 |
| 112 void AssemblyProgram::EmitMakeRelocsInstruction() { |
| 113 Emit(new MakeRelocsInstruction()); |
| 114 } |
| 115 |
| 116 void AssemblyProgram::EmitOriginInstruction(RVA rva) { |
| 117 Emit(new OriginInstruction(rva)); |
| 118 } |
| 119 |
| 120 void AssemblyProgram::EmitByteInstruction(uint8 byte) { |
| 121 Emit(GetByteInstruction(byte)); |
| 122 } |
| 123 |
| 124 void AssemblyProgram::EmitRel32(Label* label) { |
| 125 Emit(new InstructionWithLabel(REL32, label)); |
| 126 } |
| 127 |
| 128 void AssemblyProgram::EmitAbs32(Label* label) { |
| 129 Emit(new InstructionWithLabel(ABS32, label)); |
| 130 } |
| 131 |
| 132 Label* AssemblyProgram::FindOrMakeAbs32Label(RVA rva) { |
| 133 return FindLabel(rva, &abs32_labels_); |
| 134 } |
| 135 |
| 136 Label* AssemblyProgram::FindOrMakeRel32Label(RVA rva) { |
| 137 return FindLabel(rva, &rel32_labels_); |
| 138 } |
| 139 |
| 140 void AssemblyProgram::DefaultAssignIndexes() { |
| 141 DefaultAssignIndexes(&abs32_labels_); |
| 142 DefaultAssignIndexes(&rel32_labels_); |
| 143 } |
| 144 |
| 145 void AssemblyProgram::UnassignIndexes() { |
| 146 UnassignIndexes(&abs32_labels_); |
| 147 UnassignIndexes(&rel32_labels_); |
| 148 } |
| 149 |
| 150 void AssemblyProgram::AssignRemainingIndexes() { |
| 151 AssignRemainingIndexes(&abs32_labels_); |
| 152 AssignRemainingIndexes(&rel32_labels_); |
| 153 } |
| 154 |
| 155 Label* AssemblyProgram::InstructionAbs32Label( |
| 156 const Instruction* instruction) const { |
| 157 if (instruction->op() == ABS32) |
| 158 return static_cast<const InstructionWithLabel*>(instruction)->label(); |
| 159 return NULL; |
| 160 } |
| 161 |
| 162 Label* AssemblyProgram::InstructionRel32Label( |
| 163 const Instruction* instruction) const { |
| 164 if (instruction->op() == REL32) |
| 165 return static_cast<const InstructionWithLabel*>(instruction)->label(); |
| 166 return NULL; |
| 167 } |
| 168 |
| 169 Label* AssemblyProgram::FindLabel(RVA rva, RVAToLabel* labels) { |
| 170 Label*& slot = (*labels)[rva]; |
| 171 if (slot == 0) { |
| 172 slot = new Label(rva); |
| 173 } |
| 174 return slot; |
| 175 } |
| 176 |
| 177 void AssemblyProgram::UnassignIndexes(RVAToLabel* labels) { |
| 178 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) { |
| 179 Label* current = p->second; |
| 180 current->index_ = Label::kNoIndex; |
| 181 } |
| 182 } |
| 183 |
| 184 // DefaultAssignIndexes takes a set of labels and assigns indexes in increasing |
| 185 // address order. |
| 186 // |
| 187 void AssemblyProgram::DefaultAssignIndexes(RVAToLabel* labels) { |
| 188 int index = 0; |
| 189 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) { |
| 190 Label* current = p->second; |
| 191 if (current->index_ != Label::kNoIndex) |
| 192 NOTREACHED(); |
| 193 current->index_ = index; |
| 194 ++index; |
| 195 } |
| 196 } |
| 197 |
| 198 // AssignRemainingIndexes assigns indexes to any addresses (labels) that are not |
| 199 // yet assigned an index. |
| 200 // |
| 201 void AssemblyProgram::AssignRemainingIndexes(RVAToLabel* labels) { |
| 202 // An address table compresses best when each index is associated with an |
| 203 // address that is slight larger than the previous index. |
| 204 |
| 205 // First see which indexes have not been used. The 'available' vector could |
| 206 // grow even bigger, but the number of addresses is a better starting size |
| 207 // than empty. |
| 208 std::vector<bool> available(labels->size(), true); |
| 209 int used = 0; |
| 210 |
| 211 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) { |
| 212 size_t index = p->second->index_; |
| 213 if (index != Label::kNoIndex) { |
| 214 while (index >= available.size()) |
| 215 available.push_back(true); |
| 216 available.at(index) = false; |
| 217 ++used; |
| 218 } |
| 219 } |
| 220 |
| 221 LOG(INFO) << used << " of " << labels->size() << " labels pre-assigned"; |
| 222 |
| 223 // Are there any unused labels that happen to be adjacent following a used |
| 224 // label? |
| 225 // |
| 226 int fill_forward_count = 0; |
| 227 Label* prev = 0; |
| 228 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) { |
| 229 Label* current = p->second; |
| 230 if (current->index_ == Label::kNoIndex) { |
| 231 size_t index = 0; |
| 232 if (prev && prev->index_ != Label::kNoIndex) |
| 233 index = prev->index_ + 1; |
| 234 if (index < available.size() && available.at(index)) { |
| 235 current->index_ = index; |
| 236 available.at(index) = false; |
| 237 ++fill_forward_count; |
| 238 } |
| 239 } |
| 240 prev = current; |
| 241 } |
| 242 |
| 243 // Are there any unused labels that happen to be adjacent preceeding a used |
| 244 // label? |
| 245 // |
| 246 int fill_backward_count = 0; |
| 247 int backward_refs = 0; |
| 248 prev = 0; |
| 249 for (RVAToLabel::reverse_iterator p = labels->rbegin(); |
| 250 p != labels->rend(); |
| 251 ++p) { |
| 252 Label* current = p->second; |
| 253 if (current->index_ == Label::kNoIndex) { |
| 254 int prev_index; |
| 255 if (prev) |
| 256 prev_index = prev->index_; |
| 257 else |
| 258 prev_index = available.size(); |
| 259 if (prev_index != 0 && |
| 260 prev_index != Label::kNoIndex && |
| 261 available.at(prev_index - 1)) { |
| 262 current->index_ = prev_index - 1; |
| 263 available.at(current->index_) = false; |
| 264 ++fill_backward_count; |
| 265 } |
| 266 } |
| 267 prev = current; |
| 268 } |
| 269 |
| 270 // Fill in any remaining indexes |
| 271 int fill_infill_count = 0; |
| 272 int index = 0; |
| 273 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) { |
| 274 Label* current = p->second; |
| 275 if (current->index_ == Label::kNoIndex) { |
| 276 while (!available.at(index)) { |
| 277 ++index; |
| 278 } |
| 279 current->index_ = index; |
| 280 available.at(index) = false; |
| 281 ++index; |
| 282 ++fill_infill_count; |
| 283 } |
| 284 } |
| 285 |
| 286 LOG(INFO) << " fill" |
| 287 << " forward " << fill_forward_count << " " |
| 288 << " backward " << fill_backward_count << " " |
| 289 << " infill " << fill_infill_count; |
| 290 } |
| 291 |
| 292 typedef void (EncodedProgram::*DefineLabelMethod)(int index, RVA value); |
| 293 |
| 294 static void DefineLabels(const RVAToLabel& labels, |
| 295 EncodedProgram* encoded_format, |
| 296 DefineLabelMethod define_label) { |
| 297 for (RVAToLabel::const_iterator p = labels.begin(); p != labels.end(); ++p) { |
| 298 Label* label = p->second; |
| 299 (encoded_format->*define_label)(label->index_, label->rva_); |
| 300 } |
| 301 } |
| 302 |
| 303 EncodedProgram* AssemblyProgram::Encode() const { |
| 304 EncodedProgram* encoded = new EncodedProgram(); |
| 305 |
| 306 encoded->set_image_base(image_base_); |
| 307 DefineLabels(abs32_labels_, encoded, &EncodedProgram::DefineAbs32Label); |
| 308 DefineLabels(rel32_labels_, encoded, &EncodedProgram::DefineRel32Label); |
| 309 encoded->EndLabels(); |
| 310 |
| 311 for (size_t i = 0; i < instructions_.size(); ++i) { |
| 312 Instruction* instruction = instructions_[i]; |
| 313 |
| 314 switch (instruction->op()) { |
| 315 case ORIGIN: { |
| 316 OriginInstruction* org = static_cast<OriginInstruction*>(instruction); |
| 317 encoded->AddOrigin(org->origin_rva()); |
| 318 break; |
| 319 } |
| 320 case DEFBYTE: { |
| 321 uint8 b = static_cast<ByteInstruction*>(instruction)->byte_value(); |
| 322 encoded->AddCopy(1, &b); |
| 323 break; |
| 324 } |
| 325 case REL32: { |
| 326 Label* label = static_cast<InstructionWithLabel*>(instruction)->label(); |
| 327 encoded->AddRel32(label->index_); |
| 328 break; |
| 329 } |
| 330 case ABS32: { |
| 331 Label* label = static_cast<InstructionWithLabel*>(instruction)->label(); |
| 332 encoded->AddAbs32(label->index_); |
| 333 break; |
| 334 } |
| 335 case MAKERELOCS: { |
| 336 encoded->AddMakeRelocs(); |
| 337 break; |
| 338 } |
| 339 default: { |
| 340 NOTREACHED() << "Unknown Insn OP kind"; |
| 341 } |
| 342 } |
| 343 } |
| 344 |
| 345 return encoded; |
| 346 } |
| 347 |
| 348 Instruction* AssemblyProgram::GetByteInstruction(uint8 byte) { |
| 349 if (!byte_instruction_cache_) { |
| 350 byte_instruction_cache_ = new Instruction*[256]; |
| 351 for (int i = 0; i < 256; ++i) { |
| 352 byte_instruction_cache_[i] = new ByteInstruction(static_cast<uint8>(i)); |
| 353 } |
| 354 } |
| 355 |
| 356 return byte_instruction_cache_[byte]; |
| 357 } |
| 358 |
| 359 //////////////////////////////////////////////////////////////////////////////// |
| 360 |
| 361 Status Encode(AssemblyProgram* program, EncodedProgram** output) { |
| 362 *output = NULL; |
| 363 EncodedProgram *encoded = program->Encode(); |
| 364 if (encoded) { |
| 365 *output = encoded; |
| 366 return C_OK; |
| 367 } else { |
| 368 return C_GENERAL_ERROR; |
| 369 } |
| 370 } |
| 371 |
| 372 } // namespace courgette |
OLD | NEW |