Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(486)

Side by Side Diff: mi_exe_stub/x86_encoder/bcj2_encoder.cc

Issue 624713003: Keep only base/extractor.[cc|h]. (Closed) Base URL: https://chromium.googlesource.com/external/omaha.git@master
Patch Set: Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2009 Google Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 // ========================================================================
15 //
16 // The BCJ2 algorithm takes advantage of the fact that a lot of relative jumps
17 // in x86 code are to the same address. It essentially performs the moral
18 // equivalent of the following conversion:
19 // ...
20 // JMP 8 bytes back
21 // ...
22 // JMP 28 bytes back
23 // ...
24 // JMP 40 bytes back
25 // ...
26 // to:
27 // ...
28 // JMP 0x1000
29 // ...
30 // JMP 0x1000
31 // ...
32 // JMP 0x1000
33 // ...
34 //
35 // The second form has a lot more repetition, and standard entropy coding can
36 // compress it further.
37 //
38 // Details:
39 // TODO(omaha): figure out what the byte before a CALL (0xE8) instruction means.
40 // TODO(omaha): document exactly how the range encoding is used. There are 258
41 // range encoding bits. The first 256 are used for CALL instructions based on
42 // the value of the previous byte; byte 256 is used for JMP, and byte 257 is
43 // used for JCC.
44 //
45 // BCJ2 converts the targets for the CALL (0xE8), JMP (0xE9), and certain JCC
46 // (0xF80-0xF8F) instructions into absolute jumps. This is not a full x86 op
47 // code interpreter, and it is almost certain that bytes that are data rather
48 // than instructions will be encoded.
49 // The algorithm uses the following steps to perform the conversion:
50 // 1. Iterate through each byte while the current position is at least 5 bytes
51 // away from the end.
52 // 1. If the byte is not a CALL , JMP, or JCC instruction, copy the byte to the
53 // output and go back to step 1.
54 // 2. Otherwise, calculate the target of the jump. If the target of this jump is
55 // not within this file (e.g. past the end of the input), we do not rewrite
56 // this jump: write 0 to the range encoder to indicate the target was not
57 // processed and go back to step 1.
58 // 3. If the instruction is CALL, write the absolute target to the call output
59 // stream. Otherwise, write the absolute target to the jump output stream.
60 // Write a 1 to the range encoder to indicate the target was processed and go
61 // back to step 1.
62 // 4. Once within 5 bytes of the end of the input, flush out the remaining
63 // bytes. If one of the bytes happens to be one of the op codes that should
64 // be handled, write a 0 to the range encoder to indicate that it was not
65 // processed.
66
67 #include "omaha/mi_exe_stub/x86_encoder/bcj2_encoder.h"
68
69 #include "base/basictypes.h"
70 #include "omaha/mi_exe_stub/x86_encoder/range_encoder.h"
71
72 namespace omaha {
73
74 namespace {
75
76 bool IsJcc(uint8 byte0, uint8 byte1) {
77 return (byte0 == 0x0F && (byte1 & 0xF0) == 0x80);
78 }
79
80 bool IsJ(uint8 byte0, uint8 byte1) {
81 return ((byte1 & 0xFE) == 0xE8 || IsJcc(byte0, byte1));
82 }
83
84 int GetIndex(uint8 byte0, uint8 byte1) {
85 return ((byte1 == 0xE8) ? byte0 : ((byte1 == 0xE9) ? 256 : 257));
86 }
87
88 } // namespace
89
90 bool Bcj2Encode(const std::string& input,
91 std::string* main_output,
92 std::string* call_output,
93 std::string* jump_output,
94 std::string* misc_output) {
95 if (!main_output || !call_output || !jump_output || !misc_output) {
96 return false;
97 }
98
99 size_t input_position = 0;
100
101 static const int kNumberOfMoveBits = 5;
102 RangeEncoder range_encoder(misc_output);
103 RangeEncoderBit<kNumberOfMoveBits> status_encoder[256 + 2];
104
105 uint8 previous_byte = 0;
106
107 while (true) {
108 if (input.size() - input_position < 5) {
109 for (; input_position < input.size(); ++input_position) {
110 uint8 byte = input[input_position];
111 *main_output += byte;
112
113 size_t index;
114 if (0xE8 == byte) {
115 index = previous_byte;
116 } else if (0xE9 == byte) {
117 index = 256;
118 } else if (IsJcc(previous_byte, byte)) {
119 index = 257;
120 } else {
121 previous_byte = byte;
122 continue;
123 }
124 status_encoder[index].Encode(0, &range_encoder);
125 previous_byte = byte;
126 }
127
128 range_encoder.Flush();
129 return true;
130 }
131
132 while (input_position <= input.size() - 5) {
133 uint8 byte = input[input_position];
134 *main_output += byte;
135
136 if (!IsJ(previous_byte, byte)) {
137 input_position++;
138 previous_byte = byte;
139 continue;
140 }
141
142 uint8 next_byte = input[input_position + 4];
143 uint32 src =
144 static_cast<uint32>(next_byte) << 24 |
145 static_cast<uint32>(input[input_position + 3]) << 16 |
146 static_cast<uint32>(input[input_position + 2]) << 8 |
147 input[input_position + 1];
148 uint32 dst = input_position + src + 5;
149
150 uint32 index = GetIndex(previous_byte, byte);
151 if (dst < input.size()) {
152 status_encoder[index].Encode(1, &range_encoder);
153 input_position += 5;
154 std::string* s = (byte == 0xE8) ? call_output : jump_output;
155 for (int i = 24; i >= 0; i -= 8) {
156 *s += static_cast<uint8>(dst >> i);
157 }
158 previous_byte = next_byte;
159 } else {
160 status_encoder[index].Encode(0, &range_encoder);
161 input_position++;
162 previous_byte = byte;
163 }
164 }
165 }
166 }
167
168 } // namespace omaha
OLDNEW
« no previous file with comments | « mi_exe_stub/x86_encoder/bcj2_encoder.h ('k') | mi_exe_stub/x86_encoder/bcj2_encoder_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698