mi_exe_stub/x86_encoder/bcj2_encoder.cc - Issue 624713003: Keep only base/extractor.[cc|h].

Side by Side Diff: mi_exe_stub/x86_encoder/bcj2_encoder.cc

Issue 624713003: Keep only base/extractor.[cc|h]. (Closed) Base URL: https://chromium.googlesource.com/external/omaha.git@master

Patch Set: Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 // Copyright 2009 Google Inc.

2 //

3 // Licensed under the Apache License, Version 2.0 (the "License");

4 // you may not use this file except in compliance with the License.

5 // You may obtain a copy of the License at

6 //

7 // http://www.apache.org/licenses/LICENSE-2.0

8 //

9 // Unless required by applicable law or agreed to in writing, software

10 // distributed under the License is distributed on an "AS IS" BASIS,

11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12 // See the License for the specific language governing permissions and

13 // limitations under the License.

14 // ========================================================================

15 //

16 // The BCJ2 algorithm takes advantage of the fact that a lot of relative jumps

17 // in x86 code are to the same address. It essentially performs the moral

18 // equivalent of the following conversion:

19 // ...

20 // JMP 8 bytes back

21 // ...

22 // JMP 28 bytes back

23 // ...

24 // JMP 40 bytes back

25 // ...

26 // to:

27 // ...

28 // JMP 0x1000

29 // ...

30 // JMP 0x1000

31 // ...

32 // JMP 0x1000

33 // ...

34 //

35 // The second form has a lot more repetition, and standard entropy coding can

36 // compress it further.

37 //

38 // Details:

39 // TODO(omaha): figure out what the byte before a CALL (0xE8) instruction means.

40 // TODO(omaha): document exactly how the range encoding is used. There are 258

41 // range encoding bits. The first 256 are used for CALL instructions based on

42 // the value of the previous byte; byte 256 is used for JMP, and byte 257 is

43 // used for JCC.

44 //

45 // BCJ2 converts the targets for the CALL (0xE8), JMP (0xE9), and certain JCC

46 // (0xF80-0xF8F) instructions into absolute jumps. This is not a full x86 op

47 // code interpreter, and it is almost certain that bytes that are data rather

48 // than instructions will be encoded.

49 // The algorithm uses the following steps to perform the conversion:

50 // 1. Iterate through each byte while the current position is at least 5 bytes

51 // away from the end.

52 // 1. If the byte is not a CALL , JMP, or JCC instruction, copy the byte to the

53 // output and go back to step 1.

54 // 2. Otherwise, calculate the target of the jump. If the target of this jump is

55 // not within this file (e.g. past the end of the input), we do not rewrite

56 // this jump: write 0 to the range encoder to indicate the target was not

57 // processed and go back to step 1.

58 // 3. If the instruction is CALL, write the absolute target to the call output

59 // stream. Otherwise, write the absolute target to the jump output stream.

60 // Write a 1 to the range encoder to indicate the target was processed and go

61 // back to step 1.

62 // 4. Once within 5 bytes of the end of the input, flush out the remaining

63 // bytes. If one of the bytes happens to be one of the op codes that should

64 // be handled, write a 0 to the range encoder to indicate that it was not

65 // processed.

66

67 #include "omaha/mi_exe_stub/x86_encoder/bcj2_encoder.h"

68

69 #include "base/basictypes.h"

70 #include "omaha/mi_exe_stub/x86_encoder/range_encoder.h"

71

72 namespace omaha {

73

74 namespace {

75

76 bool IsJcc(uint8 byte0, uint8 byte1) {

77 return (byte0 == 0x0F && (byte1 & 0xF0) == 0x80);

78 }

79

80 bool IsJ(uint8 byte0, uint8 byte1) {

81 return ((byte1 & 0xFE) == 0xE8 \|\| IsJcc(byte0, byte1));

82 }

83

84 int GetIndex(uint8 byte0, uint8 byte1) {

85 return ((byte1 == 0xE8) ? byte0 : ((byte1 == 0xE9) ? 256 : 257));

86 }

87

88 } // namespace

89

90 bool Bcj2Encode(const std::string& input,

91 std::string* main_output,

92 std::string* call_output,

93 std::string* jump_output,

94 std::string* misc_output) {

95 if (!main_output \|\| !call_output \|\| !jump_output \|\| !misc_output) {

96 return false;

97 }

98

99 size_t input_position = 0;

100

101 static const int kNumberOfMoveBits = 5;

102 RangeEncoder range_encoder(misc_output);

103 RangeEncoderBit<kNumberOfMoveBits> status_encoder[256 + 2];

104

105 uint8 previous_byte = 0;

106

107 while (true) {

108 if (input.size() - input_position < 5) {

109 for (; input_position < input.size(); ++input_position) {

110 uint8 byte = input[input_position];

111 *main_output += byte;

112

113 size_t index;

114 if (0xE8 == byte) {

115 index = previous_byte;

116 } else if (0xE9 == byte) {

117 index = 256;

118 } else if (IsJcc(previous_byte, byte)) {

119 index = 257;

120 } else {

121 previous_byte = byte;

122 continue;

123 }

124 status_encoder[index].Encode(0, &range_encoder);

125 previous_byte = byte;

126 }

127

128 range_encoder.Flush();

129 return true;

130 }

131

132 while (input_position <= input.size() - 5) {

133 uint8 byte = input[input_position];

134 *main_output += byte;

135

136 if (!IsJ(previous_byte, byte)) {

137 input_position++;

138 previous_byte = byte;

139 continue;

140 }

141

142 uint8 next_byte = input[input_position + 4];

143 uint32 src =

144 static_cast<uint32>(next_byte) << 24 \|

145 static_cast<uint32>(input[input_position + 3]) << 16 \|

146 static_cast<uint32>(input[input_position + 2]) << 8 \|

147 input[input_position + 1];

148 uint32 dst = input_position + src + 5;

149

150 uint32 index = GetIndex(previous_byte, byte);

151 if (dst < input.size()) {

152 status_encoder[index].Encode(1, &range_encoder);

153 input_position += 5;

154 std::string* s = (byte == 0xE8) ? call_output : jump_output;

155 for (int i = 24; i >= 0; i -= 8) {

156 *s += static_cast<uint8>(dst >> i);

157 }

158 previous_byte = next_byte;

159 } else {

160 status_encoder[index].Encode(0, &range_encoder);

161 input_position++;

162 previous_byte = byte;

163 }

164 }

165 }

166 }

167

168 } // namespace omaha

OLD	NEW

« no previous file with comments | « mi_exe_stub/x86_encoder/bcj2_encoder.h ('k') | mi_exe_stub/x86_encoder/bcj2_encoder_unittest.cc » ('j') | no next file with comments »