OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef COURGETTE_ASSEMBLY_PROGRAM_H_ | 5 #ifndef COURGETTE_ASSEMBLY_PROGRAM_H_ |
6 #define COURGETTE_ASSEMBLY_PROGRAM_H_ | 6 #define COURGETTE_ASSEMBLY_PROGRAM_H_ |
7 | 7 |
8 #include <stddef.h> | |
9 #include <stdint.h> | 8 #include <stdint.h> |
10 | 9 |
11 #include <memory> | |
12 #include <vector> | 10 #include <vector> |
13 | 11 |
14 #include "base/callback_forward.h" | |
15 #include "base/macros.h" | 12 #include "base/macros.h" |
16 #include "base/memory/free_deleter.h" | |
17 #include "courgette/courgette.h" | 13 #include "courgette/courgette.h" |
18 #include "courgette/image_utils.h" | 14 #include "courgette/image_utils.h" |
19 #include "courgette/instruction_utils.h" | 15 #include "courgette/instruction_utils.h" |
20 #include "courgette/label_manager.h" | 16 #include "courgette/label_manager.h" |
21 #include "courgette/memory_allocator.h" | 17 #include "courgette/memory_allocator.h" // For CheckBool. |
22 | 18 |
23 namespace courgette { | 19 namespace courgette { |
24 | 20 |
25 class EncodedProgram; | 21 class EncodedProgram; |
26 | 22 |
27 // Opcodes of simple assembly language | 23 // An AssemblyProgram stores Labels extracted from an executable file, and |
28 enum OP { | 24 // (optionally) Label annotations. It is initialized by a Disassembler, but |
29 ORIGIN, // ORIGIN <rva> - set current address for assembly. | 25 // stores separate state so that the Disassembler can be deleted. Typical usage: |
30 MAKEPERELOCS, // Generates a base relocation table. | |
31 MAKEELFRELOCS, // Generates a base relocation table. | |
32 DEFBYTE, // DEFBYTE <value> - emit a byte literal. | |
33 REL32, // REL32 <label> - emit a rel32 encoded reference to 'label'. | |
34 ABS32, // ABS32 <label> - emit an abs32 encoded reference to 'label'. | |
35 REL32ARM, // REL32ARM <c_op> <label> - arm-specific rel32 reference | |
36 MAKEELFARMRELOCS, // Generates a base relocation table. | |
37 DEFBYTES, // Emits any number of byte literals | |
38 ABS64, // ABS64 <label> - emit an abs64 encoded reference to 'label'. | |
39 LAST_OP | |
40 }; | |
41 | |
42 // Base class for instructions. Because we have so many instructions we want to | |
43 // keep them as small as possible. For this reason we avoid virtual functions. | |
44 class Instruction { | |
45 public: | |
46 OP op() const { return static_cast<OP>(op_); } | |
47 | |
48 protected: | |
49 explicit Instruction(OP op) : op_(op), info_(0) {} | |
50 Instruction(OP op, unsigned int info) : op_(op), info_(info) {} | |
51 | |
52 uint32_t op_ : 4; // A few bits to store the OP code. | |
53 uint32_t info_ : 28; // Remaining bits in first word available to subclass. | |
54 | |
55 private: | |
56 DISALLOW_COPY_AND_ASSIGN(Instruction); | |
57 }; | |
58 | |
59 // An AssemblyProgram is the result of disassembling an executable file. | |
60 // | 26 // |
61 // * The disassembler creates labels in the AssemblyProgram and emits | 27 // * The Disassembler calls PrecomputeLabels() and injects RVAs for abs32/rel32 |
62 // 'Instructions'. | 28 // references. These are used to initialize labels. |
63 // * The disassembler then calls DefaultAssignIndexes to assign | 29 // * The Disassembler calls DefaultAssignIndexes() to assign addresses to |
64 // addresses to positions in the address tables. | 30 // positions in the address tables. |
65 // * [Optional step] | 31 // * [Optional step] |
66 // * At this point the AssemblyProgram can be converted into an | 32 // * The Disassembler can use Labels in AssemblyProgram to convert the |
67 // EncodedProgram and serialized to an output stream. | 33 // executable file to an EncodedProgram, serialized to an output stream. |
68 // * Later, the EncodedProgram can be deserialized and assembled into | 34 // * Later, the Disassembler can use the AssemblyProgram to can be deserialized |
69 // the original file. | 35 // and assembled into the original executable file via an EncodedProgram. |
70 // | 36 // |
71 // The optional step is to modify the AssemblyProgram. One form of modification | 37 // The optional step is to adjust Labels in the AssemblyProgram. One form of |
72 // is to assign indexes in such a way as to make the EncodedProgram for this | 38 // adjustment is to assign indexes in such a way as to make the EncodedProgram |
73 // AssemblyProgram look more like the EncodedProgram for some other | 39 // for an executable look more like the EncodedProgram for another exectuable. |
74 // AssemblyProgram. The modification process should call UnassignIndexes, do | 40 // The adjustment process should call UnassignIndexes(), do its own assignment, |
75 // its own assignment, and then call AssignRemainingIndexes to ensure all | 41 // and then call AssignRemainingIndexes() to ensure all indexes are assigned. |
76 // indexes are assigned. | |
77 | |
78 class AssemblyProgram { | 42 class AssemblyProgram { |
79 public: | 43 public: |
80 using LabelHandler = base::Callback<void(Label*)>; | |
81 | |
82 AssemblyProgram(ExecutableType kind, uint64_t image_base); | 44 AssemblyProgram(ExecutableType kind, uint64_t image_base); |
83 ~AssemblyProgram(); | 45 ~AssemblyProgram(); |
84 | 46 |
85 ExecutableType kind() const { return kind_; } | 47 ExecutableType kind() const { return kind_; } |
86 const std::vector<Label*>& abs32_label_annotations() const { | 48 const std::vector<Label*>& abs32_label_annotations() const { |
87 return abs32_label_annotations_; | 49 return abs32_label_annotations_; |
88 } | 50 } |
89 const std::vector<Label*>& rel32_label_annotations() const { | 51 const std::vector<Label*>& rel32_label_annotations() const { |
90 return rel32_label_annotations_; | 52 return rel32_label_annotations_; |
91 } | 53 } |
92 std::vector<Label*>* mutable_abs32_label_annotations() { | |
93 return &abs32_label_annotations_; | |
94 } | |
95 std::vector<Label*>* mutable_rel32_label_annotations() { | |
96 return &rel32_label_annotations_; | |
97 } | |
98 | 54 |
99 // Traverses RVAs in |abs32_visitor| and |rel32_visitor| to precompute Labels. | 55 // Traverses RVAs in |abs32_visitor| and |rel32_visitor| to precompute Labels. |
100 void PrecomputeLabels(RvaVisitor* abs32_visitor, RvaVisitor* rel32_visitor); | 56 void PrecomputeLabels(RvaVisitor* abs32_visitor, RvaVisitor* rel32_visitor); |
101 | 57 |
102 // Removes underused Labels. Thresholds used (0 = no trimming) is | 58 // Removes underused Labels. Thresholds used (0 = no trimming) is |
103 // architecture-dependent. | 59 // architecture-dependent. |
104 void TrimLabels(); | 60 void TrimLabels(); |
105 | 61 |
106 void UnassignIndexes(); | 62 void UnassignIndexes(); |
107 void DefaultAssignIndexes(); | 63 void DefaultAssignIndexes(); |
108 void AssignRemainingIndexes(); | 64 void AssignRemainingIndexes(); |
109 | 65 |
110 // Looks up abs32 label. Returns null if none found. | 66 // Looks up abs32 label. Returns null if none found. |
111 Label* FindAbs32Label(RVA rva); | 67 Label* FindAbs32Label(RVA rva); |
112 | 68 |
113 // Looks up rel32 label. Returns null if none found. | 69 // Looks up rel32 label. Returns null if none found. |
114 Label* FindRel32Label(RVA rva); | 70 Label* FindRel32Label(RVA rva); |
115 | 71 |
116 // Calls |gen| in 2 passes to emit instructions. In pass 1 we provide a | 72 // Uses |gen| to initializes |*_label_annotations_|. |
117 // receptor to count space requirement. In pass 2 we provide a receptor to | 73 CheckBool AnnotateLabels(const InstructionGenerator& gen); |
118 // store instructions. If |annotate_labels| is true, then extracts Label | |
119 // annotations into |*_label_annotations_|. | |
120 CheckBool GenerateInstructions(const InstructionGenerator& gen, | |
121 bool annotate_labels); | |
122 | 74 |
123 // Returns an EncodeProgram that converts program to encoded form. | 75 // Initializes |encoded| by injecting basic data and Label data. |
124 std::unique_ptr<EncodedProgram> Encode() const; | 76 bool PrepareEncodedProgram(EncodedProgram* encoded) const; |
125 | |
126 // TODO(huangs): Implement these in InstructionStoreReceptor. | |
127 // Instructions will be assembled in the order they are emitted. | |
128 | |
129 // Generates an entire base relocation table. | |
130 CheckBool EmitPeRelocs() WARN_UNUSED_RESULT; | |
131 | |
132 // Generates an ELF style relocation table for X86. | |
133 CheckBool EmitElfRelocation() WARN_UNUSED_RESULT; | |
134 | |
135 // Generates an ELF style relocation table for ARM. | |
136 CheckBool EmitElfARMRelocation() WARN_UNUSED_RESULT; | |
137 | |
138 // Following instruction will be assembled at address 'rva'. | |
139 CheckBool EmitOrigin(RVA rva) WARN_UNUSED_RESULT; | |
140 | |
141 // Generates a single byte of data or machine instruction. | |
142 CheckBool EmitSingleByte(uint8_t byte) WARN_UNUSED_RESULT; | |
143 | |
144 // Generates multiple bytes of data or machine instructions. | |
145 CheckBool EmitMultipleBytes(const uint8_t* bytes, | |
146 size_t len) WARN_UNUSED_RESULT; | |
147 | |
148 // Generates a 4-byte relative reference to address of 'label'. | |
149 CheckBool EmitRel32(Label* label) WARN_UNUSED_RESULT; | |
150 | |
151 // Generates a 4-byte relative reference to address of 'label' for ARM. | |
152 CheckBool EmitRel32ARM(uint16_t op, | |
153 Label* label, | |
154 const uint8_t* arm_op, | |
155 uint16_t op_size) WARN_UNUSED_RESULT; | |
156 | |
157 // Generates a 4-byte absolute reference to address of 'label'. | |
158 CheckBool EmitAbs32(Label* label) WARN_UNUSED_RESULT; | |
159 | |
160 // Generates an 8-byte absolute reference to address of 'label'. | |
161 CheckBool EmitAbs64(Label* label) WARN_UNUSED_RESULT; | |
162 | 77 |
163 private: | 78 private: |
164 using InstructionVector = NoThrowBuffer<Instruction*>; | |
165 | |
166 using ScopedInstruction = | |
167 std::unique_ptr<Instruction, UncheckedDeleter<Instruction>>; | |
168 | |
169 CheckBool Emit(ScopedInstruction instruction) WARN_UNUSED_RESULT; | |
170 CheckBool EmitShared(Instruction* instruction) WARN_UNUSED_RESULT; | |
171 | |
172 static const int kLabelLowerLimit; | 79 static const int kLabelLowerLimit; |
173 | 80 |
174 // Looks up a label or creates a new one. Might return NULL. | 81 // Looks up a label or creates a new one. Might return NULL. |
175 Label* FindLabel(RVA rva, RVAToLabel* labels); | 82 Label* FindLabel(RVA rva, RVAToLabel* labels); |
176 | 83 |
177 // Sharing instructions that emit a single byte saves a lot of space. | |
178 Instruction* GetByteInstruction(uint8_t byte); | |
179 | |
180 const ExecutableType kind_; | 84 const ExecutableType kind_; |
181 const uint64_t image_base_; // Desired or mandated base address of image. | 85 const uint64_t image_base_; // Desired or mandated base address of image. |
182 | 86 |
183 std::unique_ptr<Instruction* [], base::FreeDeleter> byte_instruction_cache_; | |
184 | |
185 InstructionVector instructions_; // All the instructions in program. | |
186 | |
187 // Storage and lookup of Labels associated with target addresses. We use | 87 // Storage and lookup of Labels associated with target addresses. We use |
188 // separate abs32 and rel32 labels. | 88 // separate abs32 and rel32 labels. |
189 LabelManager abs32_label_manager_; | 89 LabelManager abs32_label_manager_; |
190 LabelManager rel32_label_manager_; | 90 LabelManager rel32_label_manager_; |
191 | 91 |
192 // Label pointers for each abs32 and rel32 location, sorted by file offset. | 92 // Label pointers for each abs32 and rel32 location, sorted by file offset. |
193 // These are used by Label adjustment during patch generation. | 93 // These are used by Label adjustment during patch generation. |
194 std::vector<Label*> abs32_label_annotations_; | 94 std::vector<Label*> abs32_label_annotations_; |
195 std::vector<Label*> rel32_label_annotations_; | 95 std::vector<Label*> rel32_label_annotations_; |
196 | 96 |
197 DISALLOW_COPY_AND_ASSIGN(AssemblyProgram); | 97 DISALLOW_COPY_AND_ASSIGN(AssemblyProgram); |
198 }; | 98 }; |
199 | 99 |
200 // Converts |program| into encoded form, returning it as |*output|. | |
201 // Returns C_OK if succeeded, otherwise returns an error status and sets | |
202 // |*output| to null. | |
203 Status Encode(const AssemblyProgram& program, | |
204 std::unique_ptr<EncodedProgram>* output); | |
205 | |
206 } // namespace courgette | 100 } // namespace courgette |
207 | 101 |
208 #endif // COURGETTE_ASSEMBLY_PROGRAM_H_ | 102 #endif // COURGETTE_ASSEMBLY_PROGRAM_H_ |
OLD | NEW |