| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "third_party/courgette/assembly_program.h" | |
| 6 | |
| 7 #include <memory.h> | |
| 8 #include <algorithm> | |
| 9 #include <map> | |
| 10 #include <set> | |
| 11 #include <sstream> | |
| 12 #include <vector> | |
| 13 | |
| 14 #include "base/logging.h" | |
| 15 | |
| 16 #include "third_party/courgette/courgette.h" | |
| 17 #include "third_party/courgette/encoded_program.h" | |
| 18 | |
| 19 namespace courgette { | |
| 20 | |
| 21 // Opcodes of simple assembly language | |
| 22 enum OP { | |
| 23 ORIGIN, // ORIGIN <rva> - set current address for assembly. | |
| 24 MAKERELOCS, // Generates a base relocation table. | |
| 25 DEFBYTE, // DEFBYTE <value> - emit a byte literal. | |
| 26 REL32, // REL32 <label> - emit a rel32 encoded reference to 'label'. | |
| 27 ABS32, // REL32 <label> - emit am abs32 encoded reference to 'label'. | |
| 28 LAST_OP | |
| 29 }; | |
| 30 | |
| 31 // Base class for instructions. Because we have so many instructions we want to | |
| 32 // keep them as small as possible. For this reason we avoid virtual functions. | |
| 33 // | |
| 34 class Instruction { | |
| 35 public: | |
| 36 OP op() const { return static_cast<OP>(op_); } | |
| 37 | |
| 38 protected: | |
| 39 explicit Instruction(OP op) : op_(op), info_(0) {} | |
| 40 Instruction(OP op, unsigned int info) : op_(op), info_(info) {} | |
| 41 | |
| 42 uint32 op_ : 4; // A few bits to store the OP code. | |
| 43 uint32 info_ : 28; // Remaining bits in first word available to subclass. | |
| 44 | |
| 45 private: | |
| 46 DISALLOW_COPY_AND_ASSIGN(Instruction); | |
| 47 }; | |
| 48 | |
| 49 namespace { | |
| 50 | |
| 51 // Sets the current address for the emitting instructions. | |
| 52 class OriginInstruction : public Instruction { | |
| 53 public: | |
| 54 explicit OriginInstruction(RVA rva) : Instruction(ORIGIN, 0), rva_(rva) {} | |
| 55 RVA origin_rva() const { return rva_; } | |
| 56 private: | |
| 57 RVA rva_; | |
| 58 }; | |
| 59 | |
| 60 // Emits an entire base relocation table. | |
| 61 class MakeRelocsInstruction : public Instruction { | |
| 62 public: | |
| 63 MakeRelocsInstruction() : Instruction(MAKERELOCS) {} | |
| 64 }; | |
| 65 | |
| 66 // Emits a single byte. | |
| 67 class ByteInstruction : public Instruction { | |
| 68 public: | |
| 69 explicit ByteInstruction(uint8 value) : Instruction(DEFBYTE, value) {} | |
| 70 uint8 byte_value() const { return info_; } | |
| 71 }; | |
| 72 | |
| 73 // A ABS32 to REL32 instruction emits a reference to a label's address. | |
| 74 class InstructionWithLabel : public Instruction { | |
| 75 public: | |
| 76 InstructionWithLabel(OP op, Label* label) | |
| 77 : Instruction(op, 0), label_(label) { | |
| 78 if (label == NULL) NOTREACHED(); | |
| 79 } | |
| 80 Label* label() const { return label_; } | |
| 81 private: | |
| 82 Label* label_; | |
| 83 }; | |
| 84 | |
| 85 } // namespace | |
| 86 | |
| 87 AssemblyProgram::AssemblyProgram() | |
| 88 : image_base_(0), | |
| 89 byte_instruction_cache_(NULL) { | |
| 90 } | |
| 91 | |
| 92 static void DeleteContainedLabels(const RVAToLabel& labels) { | |
| 93 for (RVAToLabel::const_iterator p = labels.begin(); p != labels.end(); ++p) | |
| 94 delete p->second; | |
| 95 } | |
| 96 | |
| 97 AssemblyProgram::~AssemblyProgram() { | |
| 98 for (size_t i = 0; i < instructions_.size(); ++i) { | |
| 99 Instruction* instruction = instructions_[i]; | |
| 100 if (instruction->op() != DEFBYTE) // Will be in byte_instruction_cache_. | |
| 101 delete instruction; | |
| 102 } | |
| 103 if (byte_instruction_cache_) { | |
| 104 for (size_t i = 0; i < 256; ++i) | |
| 105 delete byte_instruction_cache_[i]; | |
| 106 delete[] byte_instruction_cache_; | |
| 107 } | |
| 108 DeleteContainedLabels(rel32_labels_); | |
| 109 DeleteContainedLabels(abs32_labels_); | |
| 110 } | |
| 111 | |
| 112 void AssemblyProgram::EmitMakeRelocsInstruction() { | |
| 113 Emit(new MakeRelocsInstruction()); | |
| 114 } | |
| 115 | |
| 116 void AssemblyProgram::EmitOriginInstruction(RVA rva) { | |
| 117 Emit(new OriginInstruction(rva)); | |
| 118 } | |
| 119 | |
| 120 void AssemblyProgram::EmitByteInstruction(uint8 byte) { | |
| 121 Emit(GetByteInstruction(byte)); | |
| 122 } | |
| 123 | |
| 124 void AssemblyProgram::EmitRel32(Label* label) { | |
| 125 Emit(new InstructionWithLabel(REL32, label)); | |
| 126 } | |
| 127 | |
| 128 void AssemblyProgram::EmitAbs32(Label* label) { | |
| 129 Emit(new InstructionWithLabel(ABS32, label)); | |
| 130 } | |
| 131 | |
| 132 Label* AssemblyProgram::FindOrMakeAbs32Label(RVA rva) { | |
| 133 return FindLabel(rva, &abs32_labels_); | |
| 134 } | |
| 135 | |
| 136 Label* AssemblyProgram::FindOrMakeRel32Label(RVA rva) { | |
| 137 return FindLabel(rva, &rel32_labels_); | |
| 138 } | |
| 139 | |
| 140 void AssemblyProgram::DefaultAssignIndexes() { | |
| 141 DefaultAssignIndexes(&abs32_labels_); | |
| 142 DefaultAssignIndexes(&rel32_labels_); | |
| 143 } | |
| 144 | |
| 145 void AssemblyProgram::UnassignIndexes() { | |
| 146 UnassignIndexes(&abs32_labels_); | |
| 147 UnassignIndexes(&rel32_labels_); | |
| 148 } | |
| 149 | |
| 150 void AssemblyProgram::AssignRemainingIndexes() { | |
| 151 AssignRemainingIndexes(&abs32_labels_); | |
| 152 AssignRemainingIndexes(&rel32_labels_); | |
| 153 } | |
| 154 | |
| 155 Label* AssemblyProgram::InstructionAbs32Label( | |
| 156 const Instruction* instruction) const { | |
| 157 if (instruction->op() == ABS32) | |
| 158 return static_cast<const InstructionWithLabel*>(instruction)->label(); | |
| 159 return NULL; | |
| 160 } | |
| 161 | |
| 162 Label* AssemblyProgram::InstructionRel32Label( | |
| 163 const Instruction* instruction) const { | |
| 164 if (instruction->op() == REL32) | |
| 165 return static_cast<const InstructionWithLabel*>(instruction)->label(); | |
| 166 return NULL; | |
| 167 } | |
| 168 | |
| 169 Label* AssemblyProgram::FindLabel(RVA rva, RVAToLabel* labels) { | |
| 170 Label*& slot = (*labels)[rva]; | |
| 171 if (slot == 0) { | |
| 172 slot = new Label(rva); | |
| 173 } | |
| 174 return slot; | |
| 175 } | |
| 176 | |
| 177 void AssemblyProgram::UnassignIndexes(RVAToLabel* labels) { | |
| 178 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) { | |
| 179 Label* current = p->second; | |
| 180 current->index_ = Label::kNoIndex; | |
| 181 } | |
| 182 } | |
| 183 | |
| 184 // DefaultAssignIndexes takes a set of labels and assigns indexes in increasing | |
| 185 // address order. | |
| 186 // | |
| 187 void AssemblyProgram::DefaultAssignIndexes(RVAToLabel* labels) { | |
| 188 int index = 0; | |
| 189 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) { | |
| 190 Label* current = p->second; | |
| 191 if (current->index_ != Label::kNoIndex) | |
| 192 NOTREACHED(); | |
| 193 current->index_ = index; | |
| 194 ++index; | |
| 195 } | |
| 196 } | |
| 197 | |
| 198 // AssignRemainingIndexes assigns indexes to any addresses (labels) that are not | |
| 199 // yet assigned an index. | |
| 200 // | |
| 201 void AssemblyProgram::AssignRemainingIndexes(RVAToLabel* labels) { | |
| 202 // An address table compresses best when each index is associated with an | |
| 203 // address that is slight larger than the previous index. | |
| 204 | |
| 205 // First see which indexes have not been used. The 'available' vector could | |
| 206 // grow even bigger, but the number of addresses is a better starting size | |
| 207 // than empty. | |
| 208 std::vector<bool> available(labels->size(), true); | |
| 209 int used = 0; | |
| 210 | |
| 211 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) { | |
| 212 size_t index = p->second->index_; | |
| 213 if (index != Label::kNoIndex) { | |
| 214 while (index >= available.size()) | |
| 215 available.push_back(true); | |
| 216 available.at(index) = false; | |
| 217 ++used; | |
| 218 } | |
| 219 } | |
| 220 | |
| 221 LOG(INFO) << used << " of " << labels->size() << " labels pre-assigned"; | |
| 222 | |
| 223 // Are there any unused labels that happen to be adjacent following a used | |
| 224 // label? | |
| 225 // | |
| 226 int fill_forward_count = 0; | |
| 227 Label* prev = 0; | |
| 228 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) { | |
| 229 Label* current = p->second; | |
| 230 if (current->index_ == Label::kNoIndex) { | |
| 231 size_t index = 0; | |
| 232 if (prev && prev->index_ != Label::kNoIndex) | |
| 233 index = prev->index_ + 1; | |
| 234 if (index < available.size() && available.at(index)) { | |
| 235 current->index_ = index; | |
| 236 available.at(index) = false; | |
| 237 ++fill_forward_count; | |
| 238 } | |
| 239 } | |
| 240 prev = current; | |
| 241 } | |
| 242 | |
| 243 // Are there any unused labels that happen to be adjacent preceeding a used | |
| 244 // label? | |
| 245 // | |
| 246 int fill_backward_count = 0; | |
| 247 int backward_refs = 0; | |
| 248 prev = 0; | |
| 249 for (RVAToLabel::reverse_iterator p = labels->rbegin(); | |
| 250 p != labels->rend(); | |
| 251 ++p) { | |
| 252 Label* current = p->second; | |
| 253 if (current->index_ == Label::kNoIndex) { | |
| 254 int prev_index; | |
| 255 if (prev) | |
| 256 prev_index = prev->index_; | |
| 257 else | |
| 258 prev_index = available.size(); | |
| 259 if (prev_index != 0 && | |
| 260 prev_index != Label::kNoIndex && | |
| 261 available.at(prev_index - 1)) { | |
| 262 current->index_ = prev_index - 1; | |
| 263 available.at(current->index_) = false; | |
| 264 ++fill_backward_count; | |
| 265 } | |
| 266 } | |
| 267 prev = current; | |
| 268 } | |
| 269 | |
| 270 // Fill in any remaining indexes | |
| 271 int fill_infill_count = 0; | |
| 272 int index = 0; | |
| 273 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) { | |
| 274 Label* current = p->second; | |
| 275 if (current->index_ == Label::kNoIndex) { | |
| 276 while (!available.at(index)) { | |
| 277 ++index; | |
| 278 } | |
| 279 current->index_ = index; | |
| 280 available.at(index) = false; | |
| 281 ++index; | |
| 282 ++fill_infill_count; | |
| 283 } | |
| 284 } | |
| 285 | |
| 286 LOG(INFO) << " fill" | |
| 287 << " forward " << fill_forward_count << " " | |
| 288 << " backward " << fill_backward_count << " " | |
| 289 << " infill " << fill_infill_count; | |
| 290 } | |
| 291 | |
| 292 typedef void (EncodedProgram::*DefineLabelMethod)(int index, RVA value); | |
| 293 | |
| 294 static void DefineLabels(const RVAToLabel& labels, | |
| 295 EncodedProgram* encoded_format, | |
| 296 DefineLabelMethod define_label) { | |
| 297 for (RVAToLabel::const_iterator p = labels.begin(); p != labels.end(); ++p) { | |
| 298 Label* label = p->second; | |
| 299 (encoded_format->*define_label)(label->index_, label->rva_); | |
| 300 } | |
| 301 } | |
| 302 | |
| 303 EncodedProgram* AssemblyProgram::Encode() const { | |
| 304 EncodedProgram* encoded = new EncodedProgram(); | |
| 305 | |
| 306 encoded->set_image_base(image_base_); | |
| 307 DefineLabels(abs32_labels_, encoded, &EncodedProgram::DefineAbs32Label); | |
| 308 DefineLabels(rel32_labels_, encoded, &EncodedProgram::DefineRel32Label); | |
| 309 encoded->EndLabels(); | |
| 310 | |
| 311 for (size_t i = 0; i < instructions_.size(); ++i) { | |
| 312 Instruction* instruction = instructions_[i]; | |
| 313 | |
| 314 switch (instruction->op()) { | |
| 315 case ORIGIN: { | |
| 316 OriginInstruction* org = static_cast<OriginInstruction*>(instruction); | |
| 317 encoded->AddOrigin(org->origin_rva()); | |
| 318 break; | |
| 319 } | |
| 320 case DEFBYTE: { | |
| 321 uint8 b = static_cast<ByteInstruction*>(instruction)->byte_value(); | |
| 322 encoded->AddCopy(1, &b); | |
| 323 break; | |
| 324 } | |
| 325 case REL32: { | |
| 326 Label* label = static_cast<InstructionWithLabel*>(instruction)->label(); | |
| 327 encoded->AddRel32(label->index_); | |
| 328 break; | |
| 329 } | |
| 330 case ABS32: { | |
| 331 Label* label = static_cast<InstructionWithLabel*>(instruction)->label(); | |
| 332 encoded->AddAbs32(label->index_); | |
| 333 break; | |
| 334 } | |
| 335 case MAKERELOCS: { | |
| 336 encoded->AddMakeRelocs(); | |
| 337 break; | |
| 338 } | |
| 339 default: { | |
| 340 NOTREACHED() << "Unknown Insn OP kind"; | |
| 341 } | |
| 342 } | |
| 343 } | |
| 344 | |
| 345 return encoded; | |
| 346 } | |
| 347 | |
| 348 Instruction* AssemblyProgram::GetByteInstruction(uint8 byte) { | |
| 349 if (!byte_instruction_cache_) { | |
| 350 byte_instruction_cache_ = new Instruction*[256]; | |
| 351 for (int i = 0; i < 256; ++i) { | |
| 352 byte_instruction_cache_[i] = new ByteInstruction(static_cast<uint8>(i)); | |
| 353 } | |
| 354 } | |
| 355 | |
| 356 return byte_instruction_cache_[byte]; | |
| 357 } | |
| 358 | |
| 359 //////////////////////////////////////////////////////////////////////////////// | |
| 360 | |
| 361 Status Encode(AssemblyProgram* program, EncodedProgram** output) { | |
| 362 *output = NULL; | |
| 363 EncodedProgram *encoded = program->Encode(); | |
| 364 if (encoded) { | |
| 365 *output = encoded; | |
| 366 return C_OK; | |
| 367 } else { | |
| 368 return C_GENERAL_ERROR; | |
| 369 } | |
| 370 } | |
| 371 | |
| 372 } // namespace courgette | |
| 373 | |
| OLD | NEW |