| Index: mi_exe_stub/x86_encoder/bcj2_encoder.cc
|
| diff --git a/mi_exe_stub/x86_encoder/bcj2_encoder.cc b/mi_exe_stub/x86_encoder/bcj2_encoder.cc
|
| deleted file mode 100644
|
| index bce69dbde85b6c5fd7f89fc35327c286ecfe1cd2..0000000000000000000000000000000000000000
|
| --- a/mi_exe_stub/x86_encoder/bcj2_encoder.cc
|
| +++ /dev/null
|
| @@ -1,168 +0,0 @@
|
| -// Copyright 2009 Google Inc.
|
| -//
|
| -// Licensed under the Apache License, Version 2.0 (the "License");
|
| -// you may not use this file except in compliance with the License.
|
| -// You may obtain a copy of the License at
|
| -//
|
| -// http://www.apache.org/licenses/LICENSE-2.0
|
| -//
|
| -// Unless required by applicable law or agreed to in writing, software
|
| -// distributed under the License is distributed on an "AS IS" BASIS,
|
| -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| -// See the License for the specific language governing permissions and
|
| -// limitations under the License.
|
| -// ========================================================================
|
| -//
|
| -// The BCJ2 algorithm takes advantage of the fact that a lot of relative jumps
|
| -// in x86 code are to the same address. It essentially performs the moral
|
| -// equivalent of the following conversion:
|
| -// ...
|
| -// JMP 8 bytes back
|
| -// ...
|
| -// JMP 28 bytes back
|
| -// ...
|
| -// JMP 40 bytes back
|
| -// ...
|
| -// to:
|
| -// ...
|
| -// JMP 0x1000
|
| -// ...
|
| -// JMP 0x1000
|
| -// ...
|
| -// JMP 0x1000
|
| -// ...
|
| -//
|
| -// The second form has a lot more repetition, and standard entropy coding can
|
| -// compress it further.
|
| -//
|
| -// Details:
|
| -// TODO(omaha): figure out what the byte before a CALL (0xE8) instruction means.
|
| -// TODO(omaha): document exactly how the range encoding is used. There are 258
|
| -// range encoding bits. The first 256 are used for CALL instructions based on
|
| -// the value of the previous byte; byte 256 is used for JMP, and byte 257 is
|
| -// used for JCC.
|
| -//
|
| -// BCJ2 converts the targets for the CALL (0xE8), JMP (0xE9), and certain JCC
|
| -// (0xF80-0xF8F) instructions into absolute jumps. This is not a full x86 op
|
| -// code interpreter, and it is almost certain that bytes that are data rather
|
| -// than instructions will be encoded.
|
| -// The algorithm uses the following steps to perform the conversion:
|
| -// 1. Iterate through each byte while the current position is at least 5 bytes
|
| -// away from the end.
|
| -// 1. If the byte is not a CALL , JMP, or JCC instruction, copy the byte to the
|
| -// output and go back to step 1.
|
| -// 2. Otherwise, calculate the target of the jump. If the target of this jump is
|
| -// not within this file (e.g. past the end of the input), we do not rewrite
|
| -// this jump: write 0 to the range encoder to indicate the target was not
|
| -// processed and go back to step 1.
|
| -// 3. If the instruction is CALL, write the absolute target to the call output
|
| -// stream. Otherwise, write the absolute target to the jump output stream.
|
| -// Write a 1 to the range encoder to indicate the target was processed and go
|
| -// back to step 1.
|
| -// 4. Once within 5 bytes of the end of the input, flush out the remaining
|
| -// bytes. If one of the bytes happens to be one of the op codes that should
|
| -// be handled, write a 0 to the range encoder to indicate that it was not
|
| -// processed.
|
| -
|
| -#include "omaha/mi_exe_stub/x86_encoder/bcj2_encoder.h"
|
| -
|
| -#include "base/basictypes.h"
|
| -#include "omaha/mi_exe_stub/x86_encoder/range_encoder.h"
|
| -
|
| -namespace omaha {
|
| -
|
| -namespace {
|
| -
|
| -bool IsJcc(uint8 byte0, uint8 byte1) {
|
| - return (byte0 == 0x0F && (byte1 & 0xF0) == 0x80);
|
| -}
|
| -
|
| -bool IsJ(uint8 byte0, uint8 byte1) {
|
| - return ((byte1 & 0xFE) == 0xE8 || IsJcc(byte0, byte1));
|
| -}
|
| -
|
| -int GetIndex(uint8 byte0, uint8 byte1) {
|
| - return ((byte1 == 0xE8) ? byte0 : ((byte1 == 0xE9) ? 256 : 257));
|
| -}
|
| -
|
| -} // namespace
|
| -
|
| -bool Bcj2Encode(const std::string& input,
|
| - std::string* main_output,
|
| - std::string* call_output,
|
| - std::string* jump_output,
|
| - std::string* misc_output) {
|
| - if (!main_output || !call_output || !jump_output || !misc_output) {
|
| - return false;
|
| - }
|
| -
|
| - size_t input_position = 0;
|
| -
|
| - static const int kNumberOfMoveBits = 5;
|
| - RangeEncoder range_encoder(misc_output);
|
| - RangeEncoderBit<kNumberOfMoveBits> status_encoder[256 + 2];
|
| -
|
| - uint8 previous_byte = 0;
|
| -
|
| - while (true) {
|
| - if (input.size() - input_position < 5) {
|
| - for (; input_position < input.size(); ++input_position) {
|
| - uint8 byte = input[input_position];
|
| - *main_output += byte;
|
| -
|
| - size_t index;
|
| - if (0xE8 == byte) {
|
| - index = previous_byte;
|
| - } else if (0xE9 == byte) {
|
| - index = 256;
|
| - } else if (IsJcc(previous_byte, byte)) {
|
| - index = 257;
|
| - } else {
|
| - previous_byte = byte;
|
| - continue;
|
| - }
|
| - status_encoder[index].Encode(0, &range_encoder);
|
| - previous_byte = byte;
|
| - }
|
| -
|
| - range_encoder.Flush();
|
| - return true;
|
| - }
|
| -
|
| - while (input_position <= input.size() - 5) {
|
| - uint8 byte = input[input_position];
|
| - *main_output += byte;
|
| -
|
| - if (!IsJ(previous_byte, byte)) {
|
| - input_position++;
|
| - previous_byte = byte;
|
| - continue;
|
| - }
|
| -
|
| - uint8 next_byte = input[input_position + 4];
|
| - uint32 src =
|
| - static_cast<uint32>(next_byte) << 24 |
|
| - static_cast<uint32>(input[input_position + 3]) << 16 |
|
| - static_cast<uint32>(input[input_position + 2]) << 8 |
|
| - input[input_position + 1];
|
| - uint32 dst = input_position + src + 5;
|
| -
|
| - uint32 index = GetIndex(previous_byte, byte);
|
| - if (dst < input.size()) {
|
| - status_encoder[index].Encode(1, &range_encoder);
|
| - input_position += 5;
|
| - std::string* s = (byte == 0xE8) ? call_output : jump_output;
|
| - for (int i = 24; i >= 0; i -= 8) {
|
| - *s += static_cast<uint8>(dst >> i);
|
| - }
|
| - previous_byte = next_byte;
|
| - } else {
|
| - status_encoder[index].Encode(0, &range_encoder);
|
| - input_position++;
|
| - previous_byte = byte;
|
| - }
|
| - }
|
| - }
|
| -}
|
| -
|
| -} // namespace omaha
|
|
|