OLD | NEW |
| (Empty) |
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "third_party/courgette/assembly_program.h" | |
6 | |
7 #include <memory.h> | |
8 #include <algorithm> | |
9 #include <map> | |
10 #include <set> | |
11 #include <sstream> | |
12 #include <vector> | |
13 | |
14 #include "base/logging.h" | |
15 | |
16 #include "third_party/courgette/courgette.h" | |
17 #include "third_party/courgette/encoded_program.h" | |
18 | |
19 namespace courgette { | |
20 | |
21 // Opcodes of simple assembly language | |
22 enum OP { | |
23 ORIGIN, // ORIGIN <rva> - set current address for assembly. | |
24 MAKERELOCS, // Generates a base relocation table. | |
25 DEFBYTE, // DEFBYTE <value> - emit a byte literal. | |
26 REL32, // REL32 <label> - emit a rel32 encoded reference to 'label'. | |
27 ABS32, // REL32 <label> - emit am abs32 encoded reference to 'label'. | |
28 LAST_OP | |
29 }; | |
30 | |
31 // Base class for instructions. Because we have so many instructions we want to | |
32 // keep them as small as possible. For this reason we avoid virtual functions. | |
33 // | |
34 class Instruction { | |
35 public: | |
36 OP op() const { return static_cast<OP>(op_); } | |
37 | |
38 protected: | |
39 explicit Instruction(OP op) : op_(op), info_(0) {} | |
40 Instruction(OP op, unsigned int info) : op_(op), info_(info) {} | |
41 | |
42 uint32 op_ : 4; // A few bits to store the OP code. | |
43 uint32 info_ : 28; // Remaining bits in first word available to subclass. | |
44 | |
45 private: | |
46 DISALLOW_COPY_AND_ASSIGN(Instruction); | |
47 }; | |
48 | |
49 namespace { | |
50 | |
51 // Sets the current address for the emitting instructions. | |
52 class OriginInstruction : public Instruction { | |
53 public: | |
54 explicit OriginInstruction(RVA rva) : Instruction(ORIGIN, 0), rva_(rva) {} | |
55 RVA origin_rva() const { return rva_; } | |
56 private: | |
57 RVA rva_; | |
58 }; | |
59 | |
60 // Emits an entire base relocation table. | |
61 class MakeRelocsInstruction : public Instruction { | |
62 public: | |
63 MakeRelocsInstruction() : Instruction(MAKERELOCS) {} | |
64 }; | |
65 | |
66 // Emits a single byte. | |
67 class ByteInstruction : public Instruction { | |
68 public: | |
69 explicit ByteInstruction(uint8 value) : Instruction(DEFBYTE, value) {} | |
70 uint8 byte_value() const { return info_; } | |
71 }; | |
72 | |
73 // A ABS32 to REL32 instruction emits a reference to a label's address. | |
74 class InstructionWithLabel : public Instruction { | |
75 public: | |
76 InstructionWithLabel(OP op, Label* label) | |
77 : Instruction(op, 0), label_(label) { | |
78 if (label == NULL) NOTREACHED(); | |
79 } | |
80 Label* label() const { return label_; } | |
81 private: | |
82 Label* label_; | |
83 }; | |
84 | |
85 } // namespace | |
86 | |
87 AssemblyProgram::AssemblyProgram() | |
88 : image_base_(0), | |
89 byte_instruction_cache_(NULL) { | |
90 } | |
91 | |
92 static void DeleteContainedLabels(const RVAToLabel& labels) { | |
93 for (RVAToLabel::const_iterator p = labels.begin(); p != labels.end(); ++p) | |
94 delete p->second; | |
95 } | |
96 | |
97 AssemblyProgram::~AssemblyProgram() { | |
98 for (size_t i = 0; i < instructions_.size(); ++i) { | |
99 Instruction* instruction = instructions_[i]; | |
100 if (instruction->op() != DEFBYTE) // Will be in byte_instruction_cache_. | |
101 delete instruction; | |
102 } | |
103 if (byte_instruction_cache_) { | |
104 for (size_t i = 0; i < 256; ++i) | |
105 delete byte_instruction_cache_[i]; | |
106 delete[] byte_instruction_cache_; | |
107 } | |
108 DeleteContainedLabels(rel32_labels_); | |
109 DeleteContainedLabels(abs32_labels_); | |
110 } | |
111 | |
112 void AssemblyProgram::EmitMakeRelocsInstruction() { | |
113 Emit(new MakeRelocsInstruction()); | |
114 } | |
115 | |
116 void AssemblyProgram::EmitOriginInstruction(RVA rva) { | |
117 Emit(new OriginInstruction(rva)); | |
118 } | |
119 | |
120 void AssemblyProgram::EmitByteInstruction(uint8 byte) { | |
121 Emit(GetByteInstruction(byte)); | |
122 } | |
123 | |
124 void AssemblyProgram::EmitRel32(Label* label) { | |
125 Emit(new InstructionWithLabel(REL32, label)); | |
126 } | |
127 | |
128 void AssemblyProgram::EmitAbs32(Label* label) { | |
129 Emit(new InstructionWithLabel(ABS32, label)); | |
130 } | |
131 | |
132 Label* AssemblyProgram::FindOrMakeAbs32Label(RVA rva) { | |
133 return FindLabel(rva, &abs32_labels_); | |
134 } | |
135 | |
136 Label* AssemblyProgram::FindOrMakeRel32Label(RVA rva) { | |
137 return FindLabel(rva, &rel32_labels_); | |
138 } | |
139 | |
140 void AssemblyProgram::DefaultAssignIndexes() { | |
141 DefaultAssignIndexes(&abs32_labels_); | |
142 DefaultAssignIndexes(&rel32_labels_); | |
143 } | |
144 | |
145 void AssemblyProgram::UnassignIndexes() { | |
146 UnassignIndexes(&abs32_labels_); | |
147 UnassignIndexes(&rel32_labels_); | |
148 } | |
149 | |
150 void AssemblyProgram::AssignRemainingIndexes() { | |
151 AssignRemainingIndexes(&abs32_labels_); | |
152 AssignRemainingIndexes(&rel32_labels_); | |
153 } | |
154 | |
155 Label* AssemblyProgram::InstructionAbs32Label( | |
156 const Instruction* instruction) const { | |
157 if (instruction->op() == ABS32) | |
158 return static_cast<const InstructionWithLabel*>(instruction)->label(); | |
159 return NULL; | |
160 } | |
161 | |
162 Label* AssemblyProgram::InstructionRel32Label( | |
163 const Instruction* instruction) const { | |
164 if (instruction->op() == REL32) | |
165 return static_cast<const InstructionWithLabel*>(instruction)->label(); | |
166 return NULL; | |
167 } | |
168 | |
169 Label* AssemblyProgram::FindLabel(RVA rva, RVAToLabel* labels) { | |
170 Label*& slot = (*labels)[rva]; | |
171 if (slot == 0) { | |
172 slot = new Label(rva); | |
173 } | |
174 return slot; | |
175 } | |
176 | |
177 void AssemblyProgram::UnassignIndexes(RVAToLabel* labels) { | |
178 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) { | |
179 Label* current = p->second; | |
180 current->index_ = Label::kNoIndex; | |
181 } | |
182 } | |
183 | |
184 // DefaultAssignIndexes takes a set of labels and assigns indexes in increasing | |
185 // address order. | |
186 // | |
187 void AssemblyProgram::DefaultAssignIndexes(RVAToLabel* labels) { | |
188 int index = 0; | |
189 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) { | |
190 Label* current = p->second; | |
191 if (current->index_ != Label::kNoIndex) | |
192 NOTREACHED(); | |
193 current->index_ = index; | |
194 ++index; | |
195 } | |
196 } | |
197 | |
198 // AssignRemainingIndexes assigns indexes to any addresses (labels) that are not | |
199 // yet assigned an index. | |
200 // | |
201 void AssemblyProgram::AssignRemainingIndexes(RVAToLabel* labels) { | |
202 // An address table compresses best when each index is associated with an | |
203 // address that is slight larger than the previous index. | |
204 | |
205 // First see which indexes have not been used. The 'available' vector could | |
206 // grow even bigger, but the number of addresses is a better starting size | |
207 // than empty. | |
208 std::vector<bool> available(labels->size(), true); | |
209 int used = 0; | |
210 | |
211 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) { | |
212 size_t index = p->second->index_; | |
213 if (index != Label::kNoIndex) { | |
214 while (index >= available.size()) | |
215 available.push_back(true); | |
216 available.at(index) = false; | |
217 ++used; | |
218 } | |
219 } | |
220 | |
221 LOG(INFO) << used << " of " << labels->size() << " labels pre-assigned"; | |
222 | |
223 // Are there any unused labels that happen to be adjacent following a used | |
224 // label? | |
225 // | |
226 int fill_forward_count = 0; | |
227 Label* prev = 0; | |
228 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) { | |
229 Label* current = p->second; | |
230 if (current->index_ == Label::kNoIndex) { | |
231 size_t index = 0; | |
232 if (prev && prev->index_ != Label::kNoIndex) | |
233 index = prev->index_ + 1; | |
234 if (index < available.size() && available.at(index)) { | |
235 current->index_ = index; | |
236 available.at(index) = false; | |
237 ++fill_forward_count; | |
238 } | |
239 } | |
240 prev = current; | |
241 } | |
242 | |
243 // Are there any unused labels that happen to be adjacent preceeding a used | |
244 // label? | |
245 // | |
246 int fill_backward_count = 0; | |
247 int backward_refs = 0; | |
248 prev = 0; | |
249 for (RVAToLabel::reverse_iterator p = labels->rbegin(); | |
250 p != labels->rend(); | |
251 ++p) { | |
252 Label* current = p->second; | |
253 if (current->index_ == Label::kNoIndex) { | |
254 int prev_index; | |
255 if (prev) | |
256 prev_index = prev->index_; | |
257 else | |
258 prev_index = available.size(); | |
259 if (prev_index != 0 && | |
260 prev_index != Label::kNoIndex && | |
261 available.at(prev_index - 1)) { | |
262 current->index_ = prev_index - 1; | |
263 available.at(current->index_) = false; | |
264 ++fill_backward_count; | |
265 } | |
266 } | |
267 prev = current; | |
268 } | |
269 | |
270 // Fill in any remaining indexes | |
271 int fill_infill_count = 0; | |
272 int index = 0; | |
273 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) { | |
274 Label* current = p->second; | |
275 if (current->index_ == Label::kNoIndex) { | |
276 while (!available.at(index)) { | |
277 ++index; | |
278 } | |
279 current->index_ = index; | |
280 available.at(index) = false; | |
281 ++index; | |
282 ++fill_infill_count; | |
283 } | |
284 } | |
285 | |
286 LOG(INFO) << " fill" | |
287 << " forward " << fill_forward_count << " " | |
288 << " backward " << fill_backward_count << " " | |
289 << " infill " << fill_infill_count; | |
290 } | |
291 | |
292 typedef void (EncodedProgram::*DefineLabelMethod)(int index, RVA value); | |
293 | |
294 static void DefineLabels(const RVAToLabel& labels, | |
295 EncodedProgram* encoded_format, | |
296 DefineLabelMethod define_label) { | |
297 for (RVAToLabel::const_iterator p = labels.begin(); p != labels.end(); ++p) { | |
298 Label* label = p->second; | |
299 (encoded_format->*define_label)(label->index_, label->rva_); | |
300 } | |
301 } | |
302 | |
303 EncodedProgram* AssemblyProgram::Encode() const { | |
304 EncodedProgram* encoded = new EncodedProgram(); | |
305 | |
306 encoded->set_image_base(image_base_); | |
307 DefineLabels(abs32_labels_, encoded, &EncodedProgram::DefineAbs32Label); | |
308 DefineLabels(rel32_labels_, encoded, &EncodedProgram::DefineRel32Label); | |
309 encoded->EndLabels(); | |
310 | |
311 for (size_t i = 0; i < instructions_.size(); ++i) { | |
312 Instruction* instruction = instructions_[i]; | |
313 | |
314 switch (instruction->op()) { | |
315 case ORIGIN: { | |
316 OriginInstruction* org = static_cast<OriginInstruction*>(instruction); | |
317 encoded->AddOrigin(org->origin_rva()); | |
318 break; | |
319 } | |
320 case DEFBYTE: { | |
321 uint8 b = static_cast<ByteInstruction*>(instruction)->byte_value(); | |
322 encoded->AddCopy(1, &b); | |
323 break; | |
324 } | |
325 case REL32: { | |
326 Label* label = static_cast<InstructionWithLabel*>(instruction)->label(); | |
327 encoded->AddRel32(label->index_); | |
328 break; | |
329 } | |
330 case ABS32: { | |
331 Label* label = static_cast<InstructionWithLabel*>(instruction)->label(); | |
332 encoded->AddAbs32(label->index_); | |
333 break; | |
334 } | |
335 case MAKERELOCS: { | |
336 encoded->AddMakeRelocs(); | |
337 break; | |
338 } | |
339 default: { | |
340 NOTREACHED() << "Unknown Insn OP kind"; | |
341 } | |
342 } | |
343 } | |
344 | |
345 return encoded; | |
346 } | |
347 | |
348 Instruction* AssemblyProgram::GetByteInstruction(uint8 byte) { | |
349 if (!byte_instruction_cache_) { | |
350 byte_instruction_cache_ = new Instruction*[256]; | |
351 for (int i = 0; i < 256; ++i) { | |
352 byte_instruction_cache_[i] = new ByteInstruction(static_cast<uint8>(i)); | |
353 } | |
354 } | |
355 | |
356 return byte_instruction_cache_[byte]; | |
357 } | |
358 | |
359 //////////////////////////////////////////////////////////////////////////////// | |
360 | |
361 Status Encode(AssemblyProgram* program, EncodedProgram** output) { | |
362 *output = NULL; | |
363 EncodedProgram *encoded = program->Encode(); | |
364 if (encoded) { | |
365 *output = encoded; | |
366 return C_OK; | |
367 } else { | |
368 return C_GENERAL_ERROR; | |
369 } | |
370 } | |
371 | |
372 } // namespace courgette | |
373 | |
OLD | NEW |