OLD | NEW |
| (Empty) |
1 // Copyright 2009 Google Inc. | |
2 // | |
3 // Licensed under the Apache License, Version 2.0 (the "License"); | |
4 // you may not use this file except in compliance with the License. | |
5 // You may obtain a copy of the License at | |
6 // | |
7 // http://www.apache.org/licenses/LICENSE-2.0 | |
8 // | |
9 // Unless required by applicable law or agreed to in writing, software | |
10 // distributed under the License is distributed on an "AS IS" BASIS, | |
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 // See the License for the specific language governing permissions and | |
13 // limitations under the License. | |
14 // ======================================================================== | |
15 // | |
16 // The BCJ2 algorithm takes advantage of the fact that a lot of relative jumps | |
17 // in x86 code are to the same address. It essentially performs the moral | |
18 // equivalent of the following conversion: | |
19 // ... | |
20 // JMP 8 bytes back | |
21 // ... | |
22 // JMP 28 bytes back | |
23 // ... | |
24 // JMP 40 bytes back | |
25 // ... | |
26 // to: | |
27 // ... | |
28 // JMP 0x1000 | |
29 // ... | |
30 // JMP 0x1000 | |
31 // ... | |
32 // JMP 0x1000 | |
33 // ... | |
34 // | |
35 // The second form has a lot more repetition, and standard entropy coding can | |
36 // compress it further. | |
37 // | |
38 // Details: | |
39 // TODO(omaha): figure out what the byte before a CALL (0xE8) instruction means. | |
40 // TODO(omaha): document exactly how the range encoding is used. There are 258 | |
41 // range encoding bits. The first 256 are used for CALL instructions based on | |
42 // the value of the previous byte; byte 256 is used for JMP, and byte 257 is | |
43 // used for JCC. | |
44 // | |
45 // BCJ2 converts the targets for the CALL (0xE8), JMP (0xE9), and certain JCC | |
46 // (0xF80-0xF8F) instructions into absolute jumps. This is not a full x86 op | |
47 // code interpreter, and it is almost certain that bytes that are data rather | |
48 // than instructions will be encoded. | |
49 // The algorithm uses the following steps to perform the conversion: | |
50 // 1. Iterate through each byte while the current position is at least 5 bytes | |
51 // away from the end. | |
52 // 1. If the byte is not a CALL , JMP, or JCC instruction, copy the byte to the | |
53 // output and go back to step 1. | |
54 // 2. Otherwise, calculate the target of the jump. If the target of this jump is | |
55 // not within this file (e.g. past the end of the input), we do not rewrite | |
56 // this jump: write 0 to the range encoder to indicate the target was not | |
57 // processed and go back to step 1. | |
58 // 3. If the instruction is CALL, write the absolute target to the call output | |
59 // stream. Otherwise, write the absolute target to the jump output stream. | |
60 // Write a 1 to the range encoder to indicate the target was processed and go | |
61 // back to step 1. | |
62 // 4. Once within 5 bytes of the end of the input, flush out the remaining | |
63 // bytes. If one of the bytes happens to be one of the op codes that should | |
64 // be handled, write a 0 to the range encoder to indicate that it was not | |
65 // processed. | |
66 | |
67 #include "omaha/mi_exe_stub/x86_encoder/bcj2_encoder.h" | |
68 | |
69 #include "base/basictypes.h" | |
70 #include "omaha/mi_exe_stub/x86_encoder/range_encoder.h" | |
71 | |
72 namespace omaha { | |
73 | |
74 namespace { | |
75 | |
76 bool IsJcc(uint8 byte0, uint8 byte1) { | |
77 return (byte0 == 0x0F && (byte1 & 0xF0) == 0x80); | |
78 } | |
79 | |
80 bool IsJ(uint8 byte0, uint8 byte1) { | |
81 return ((byte1 & 0xFE) == 0xE8 || IsJcc(byte0, byte1)); | |
82 } | |
83 | |
84 int GetIndex(uint8 byte0, uint8 byte1) { | |
85 return ((byte1 == 0xE8) ? byte0 : ((byte1 == 0xE9) ? 256 : 257)); | |
86 } | |
87 | |
88 } // namespace | |
89 | |
90 bool Bcj2Encode(const std::string& input, | |
91 std::string* main_output, | |
92 std::string* call_output, | |
93 std::string* jump_output, | |
94 std::string* misc_output) { | |
95 if (!main_output || !call_output || !jump_output || !misc_output) { | |
96 return false; | |
97 } | |
98 | |
99 size_t input_position = 0; | |
100 | |
101 static const int kNumberOfMoveBits = 5; | |
102 RangeEncoder range_encoder(misc_output); | |
103 RangeEncoderBit<kNumberOfMoveBits> status_encoder[256 + 2]; | |
104 | |
105 uint8 previous_byte = 0; | |
106 | |
107 while (true) { | |
108 if (input.size() - input_position < 5) { | |
109 for (; input_position < input.size(); ++input_position) { | |
110 uint8 byte = input[input_position]; | |
111 *main_output += byte; | |
112 | |
113 size_t index; | |
114 if (0xE8 == byte) { | |
115 index = previous_byte; | |
116 } else if (0xE9 == byte) { | |
117 index = 256; | |
118 } else if (IsJcc(previous_byte, byte)) { | |
119 index = 257; | |
120 } else { | |
121 previous_byte = byte; | |
122 continue; | |
123 } | |
124 status_encoder[index].Encode(0, &range_encoder); | |
125 previous_byte = byte; | |
126 } | |
127 | |
128 range_encoder.Flush(); | |
129 return true; | |
130 } | |
131 | |
132 while (input_position <= input.size() - 5) { | |
133 uint8 byte = input[input_position]; | |
134 *main_output += byte; | |
135 | |
136 if (!IsJ(previous_byte, byte)) { | |
137 input_position++; | |
138 previous_byte = byte; | |
139 continue; | |
140 } | |
141 | |
142 uint8 next_byte = input[input_position + 4]; | |
143 uint32 src = | |
144 static_cast<uint32>(next_byte) << 24 | | |
145 static_cast<uint32>(input[input_position + 3]) << 16 | | |
146 static_cast<uint32>(input[input_position + 2]) << 8 | | |
147 input[input_position + 1]; | |
148 uint32 dst = input_position + src + 5; | |
149 | |
150 uint32 index = GetIndex(previous_byte, byte); | |
151 if (dst < input.size()) { | |
152 status_encoder[index].Encode(1, &range_encoder); | |
153 input_position += 5; | |
154 std::string* s = (byte == 0xE8) ? call_output : jump_output; | |
155 for (int i = 24; i >= 0; i -= 8) { | |
156 *s += static_cast<uint8>(dst >> i); | |
157 } | |
158 previous_byte = next_byte; | |
159 } else { | |
160 status_encoder[index].Encode(0, &range_encoder); | |
161 input_position++; | |
162 previous_byte = byte; | |
163 } | |
164 } | |
165 } | |
166 } | |
167 | |
168 } // namespace omaha | |
OLD | NEW |