| OLD | NEW |
| (Empty) |
| 1 // Copyright 2009 Google Inc. | |
| 2 // | |
| 3 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 4 // you may not use this file except in compliance with the License. | |
| 5 // You may obtain a copy of the License at | |
| 6 // | |
| 7 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 8 // | |
| 9 // Unless required by applicable law or agreed to in writing, software | |
| 10 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 12 // See the License for the specific language governing permissions and | |
| 13 // limitations under the License. | |
| 14 // ======================================================================== | |
| 15 // | |
| 16 // The BCJ2 algorithm takes advantage of the fact that a lot of relative jumps | |
| 17 // in x86 code are to the same address. It essentially performs the moral | |
| 18 // equivalent of the following conversion: | |
| 19 // ... | |
| 20 // JMP 8 bytes back | |
| 21 // ... | |
| 22 // JMP 28 bytes back | |
| 23 // ... | |
| 24 // JMP 40 bytes back | |
| 25 // ... | |
| 26 // to: | |
| 27 // ... | |
| 28 // JMP 0x1000 | |
| 29 // ... | |
| 30 // JMP 0x1000 | |
| 31 // ... | |
| 32 // JMP 0x1000 | |
| 33 // ... | |
| 34 // | |
| 35 // The second form has a lot more repetition, and standard entropy coding can | |
| 36 // compress it further. | |
| 37 // | |
| 38 // Details: | |
| 39 // TODO(omaha): figure out what the byte before a CALL (0xE8) instruction means. | |
| 40 // TODO(omaha): document exactly how the range encoding is used. There are 258 | |
| 41 // range encoding bits. The first 256 are used for CALL instructions based on | |
| 42 // the value of the previous byte; byte 256 is used for JMP, and byte 257 is | |
| 43 // used for JCC. | |
| 44 // | |
| 45 // BCJ2 converts the targets for the CALL (0xE8), JMP (0xE9), and certain JCC | |
| 46 // (0xF80-0xF8F) instructions into absolute jumps. This is not a full x86 op | |
| 47 // code interpreter, and it is almost certain that bytes that are data rather | |
| 48 // than instructions will be encoded. | |
| 49 // The algorithm uses the following steps to perform the conversion: | |
| 50 // 1. Iterate through each byte while the current position is at least 5 bytes | |
| 51 // away from the end. | |
| 52 // 1. If the byte is not a CALL , JMP, or JCC instruction, copy the byte to the | |
| 53 // output and go back to step 1. | |
| 54 // 2. Otherwise, calculate the target of the jump. If the target of this jump is | |
| 55 // not within this file (e.g. past the end of the input), we do not rewrite | |
| 56 // this jump: write 0 to the range encoder to indicate the target was not | |
| 57 // processed and go back to step 1. | |
| 58 // 3. If the instruction is CALL, write the absolute target to the call output | |
| 59 // stream. Otherwise, write the absolute target to the jump output stream. | |
| 60 // Write a 1 to the range encoder to indicate the target was processed and go | |
| 61 // back to step 1. | |
| 62 // 4. Once within 5 bytes of the end of the input, flush out the remaining | |
| 63 // bytes. If one of the bytes happens to be one of the op codes that should | |
| 64 // be handled, write a 0 to the range encoder to indicate that it was not | |
| 65 // processed. | |
| 66 | |
| 67 #include "omaha/mi_exe_stub/x86_encoder/bcj2_encoder.h" | |
| 68 | |
| 69 #include "base/basictypes.h" | |
| 70 #include "omaha/mi_exe_stub/x86_encoder/range_encoder.h" | |
| 71 | |
| 72 namespace omaha { | |
| 73 | |
| 74 namespace { | |
| 75 | |
| 76 bool IsJcc(uint8 byte0, uint8 byte1) { | |
| 77 return (byte0 == 0x0F && (byte1 & 0xF0) == 0x80); | |
| 78 } | |
| 79 | |
| 80 bool IsJ(uint8 byte0, uint8 byte1) { | |
| 81 return ((byte1 & 0xFE) == 0xE8 || IsJcc(byte0, byte1)); | |
| 82 } | |
| 83 | |
| 84 int GetIndex(uint8 byte0, uint8 byte1) { | |
| 85 return ((byte1 == 0xE8) ? byte0 : ((byte1 == 0xE9) ? 256 : 257)); | |
| 86 } | |
| 87 | |
| 88 } // namespace | |
| 89 | |
| 90 bool Bcj2Encode(const std::string& input, | |
| 91 std::string* main_output, | |
| 92 std::string* call_output, | |
| 93 std::string* jump_output, | |
| 94 std::string* misc_output) { | |
| 95 if (!main_output || !call_output || !jump_output || !misc_output) { | |
| 96 return false; | |
| 97 } | |
| 98 | |
| 99 size_t input_position = 0; | |
| 100 | |
| 101 static const int kNumberOfMoveBits = 5; | |
| 102 RangeEncoder range_encoder(misc_output); | |
| 103 RangeEncoderBit<kNumberOfMoveBits> status_encoder[256 + 2]; | |
| 104 | |
| 105 uint8 previous_byte = 0; | |
| 106 | |
| 107 while (true) { | |
| 108 if (input.size() - input_position < 5) { | |
| 109 for (; input_position < input.size(); ++input_position) { | |
| 110 uint8 byte = input[input_position]; | |
| 111 *main_output += byte; | |
| 112 | |
| 113 size_t index; | |
| 114 if (0xE8 == byte) { | |
| 115 index = previous_byte; | |
| 116 } else if (0xE9 == byte) { | |
| 117 index = 256; | |
| 118 } else if (IsJcc(previous_byte, byte)) { | |
| 119 index = 257; | |
| 120 } else { | |
| 121 previous_byte = byte; | |
| 122 continue; | |
| 123 } | |
| 124 status_encoder[index].Encode(0, &range_encoder); | |
| 125 previous_byte = byte; | |
| 126 } | |
| 127 | |
| 128 range_encoder.Flush(); | |
| 129 return true; | |
| 130 } | |
| 131 | |
| 132 while (input_position <= input.size() - 5) { | |
| 133 uint8 byte = input[input_position]; | |
| 134 *main_output += byte; | |
| 135 | |
| 136 if (!IsJ(previous_byte, byte)) { | |
| 137 input_position++; | |
| 138 previous_byte = byte; | |
| 139 continue; | |
| 140 } | |
| 141 | |
| 142 uint8 next_byte = input[input_position + 4]; | |
| 143 uint32 src = | |
| 144 static_cast<uint32>(next_byte) << 24 | | |
| 145 static_cast<uint32>(input[input_position + 3]) << 16 | | |
| 146 static_cast<uint32>(input[input_position + 2]) << 8 | | |
| 147 input[input_position + 1]; | |
| 148 uint32 dst = input_position + src + 5; | |
| 149 | |
| 150 uint32 index = GetIndex(previous_byte, byte); | |
| 151 if (dst < input.size()) { | |
| 152 status_encoder[index].Encode(1, &range_encoder); | |
| 153 input_position += 5; | |
| 154 std::string* s = (byte == 0xE8) ? call_output : jump_output; | |
| 155 for (int i = 24; i >= 0; i -= 8) { | |
| 156 *s += static_cast<uint8>(dst >> i); | |
| 157 } | |
| 158 previous_byte = next_byte; | |
| 159 } else { | |
| 160 status_encoder[index].Encode(0, &range_encoder); | |
| 161 input_position++; | |
| 162 previous_byte = byte; | |
| 163 } | |
| 164 } | |
| 165 } | |
| 166 } | |
| 167 | |
| 168 } // namespace omaha | |
| OLD | NEW |