sdch/open_vcdiff/depot/opensource/open-vcdiff/src/google/vcencoder.h - Issue 5203: Transition to pulling open-vcdiff from repository, instead of using snapshot...

Side by Side Diff: sdch/open_vcdiff/depot/opensource/open-vcdiff/src/google/vcencoder.h

Issue 5203: Transition to pulling open-vcdiff from repository, instead of using snapshot... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: Created 12 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« no previous file with comments | « sdch/open_vcdiff/depot/opensource/open-vcdiff/src/google/vcdecoder.h ('k') | sdch/open_vcdiff/depot/opensource/open-vcdiff/src/gtest/README » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
	(Empty)
1 // Copyright 2007 Google Inc.

2 // Author: Lincoln Smith

3 //

4 // Licensed under the Apache License, Version 2.0 (the "License");

5 // you may not use this file except in compliance with the License.

6 // You may obtain a copy of the License at

7 //

8 // http://www.apache.org/licenses/LICENSE-2.0

9 //

10 // Unless required by applicable law or agreed to in writing, software

11 // distributed under the License is distributed on an "AS IS" BASIS,

12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

13 // See the License for the specific language governing permissions and

14 // limitations under the License.

15

16 #ifndef OPEN_VCDIFF_VCENCODER_H_

17 #define OPEN_VCDIFF_VCENCODER_H_

18

19 #include <cstddef> // size_t

20 #include <vector>

21 #include "google/output_string.h"

22

23 namespace open_vcdiff {

24

25 class VCDiffEngine;

26 class VCDiffStreamingEncoderImpl;

27

28 // These flags are passed to the constructor of VCDiffStreamingEncoder

29 // to determine whether certain open-vcdiff format extensions

30 // (which are not part of the RFC 3284 draft standard for VCDIFF)

31 // are employed.

32 //

33 // Because these extensions are not part of the VCDIFF standard, if

34 // any of these flags except VCD_STANDARD_FORMAT is specified, then the caller

35 // must be certain that the receiver of the data will be using open-vcdiff

36 // to decode the delta file, or at least that the receiver can interpret

37 // these extensions. The encoder will use an 'S' as the fourth character

38 // in the delta file to indicate that non-standard extensions are being used.

39 //

40 enum VCDiffFormatExtensionFlagValues {

41 // No extensions: the encoded format will conform to the RFC

42 // draft standard for VCDIFF.

43 VCD_STANDARD_FORMAT = 0x00,

44 // If this flag is specified, then the encoder writes each delta file

45 // window by interleaving instructions and sizes with their corresponding

46 // addresses and data, rather than placing these elements

47 // into three separate sections. This facilitates providing partially

48 // decoded results when only a portion of a delta file window is received

49 // (e.g. when HTTP over TCP is used as the transmission protocol.)

50 VCD_FORMAT_INTERLEAVED = 0x01,

51 // If this flag is specified, then an Adler32 checksum

52 // of the target window data is included in the delta window.

53 VCD_FORMAT_CHECKSUM = 0x02

54 };

55

56 typedef int VCDiffFormatExtensionFlags;

57

58 // A HashedDictionary must be constructed from the dictionary data

59 // in order to use VCDiffStreamingEncoder. If the same dictionary will

60 // be used to perform several encoding operations, then the caller should

61 // create the HashedDictionary once and cache it for reuse. This object

62 // is thread-safe: the same const HashedDictionary can be used

63 // by several threads simultaneously, each with its own VCDiffStreamingEncoder.

64 //

65 // dictionary_contents is copied into the HashedDictionary, so the

66 // caller may free that string, if desired, after the constructor returns.

67 //

68 class HashedDictionary {

69 public:

70 HashedDictionary(const char* dictionary_contents,

71 size_t dictionary_size);

72 ~HashedDictionary();

73

74 // Init() must be called before using the HashedDictionary as an argument

75 // to the VCDiffStreamingEncoder, or for any other purpose except

76 // destruction. It returns true if initialization succeeded, or false

77 // if an error occurred, in which case the caller should destroy the object

78 // without using it.

79 bool Init();

80

81 const VCDiffEngine* engine() const { return engine_; }

82

83 private:

84 const VCDiffEngine* engine_;

85 };

86

87 // The standard streaming interface to the VCDIFF (RFC 3284) encoder.

88 // "Streaming" in this context means that, even though the entire set of

89 // input data to be encoded may not be available at once, the encoder

90 // can produce partial output based on what is available. Of course,

91 // the caller should try to maximize the sizes of the data chunks passed

92 // to the encoder.

93 class VCDiffStreamingEncoder {

94 public:

95 // The HashedDictionary object passed to the constructor must remain valid,

96 // without being deleted, for the lifetime of the VCDiffStreamingEncoder

97 // object.

98 //

99 // format_extensions allows certain open-vcdiff extensions to the VCDIFF

100 // format to be included in the encoded output. These extensions are not

101 // part of the RFC 3284 draft standard, so specifying any extension flags

102 // will make the output compatible only with open-vcdiff, or with other

103 // VCDIFF implementations that accept these extensions. See above for an

104 // explanation of each possible flag value.

105 //

106 // *** look_for_target_matches:

107 // The VCDIFF format allows COPY instruction addresses to reference data from

108 // the source (dictionary), or from previously encoded target data.

109 //

110 // If look_for_target_matches is false, then the encoder will only

111 // produce COPY instructions that reference source data from the dictionary,

112 // never from previously encoded target data. This will speed up the encoding

113 // process, but the encoded data will not be as compact.

114 //

115 // If this value is true, then the encoder will produce COPY instructions

116 // that reference either source data or target data. A COPY instruction from

117 // the previously encoded target data may even extend into the range of the

118 // data being produced by that same COPY instruction; for example, if the

119 // previously encoded target data is "LA", then a single COPY instruction of

120 // length 10 can produce the additional target data "LALALALALA".

121 //

122 // There is a third type of COPY instruction that starts within

123 // the source data and extends from the end of the source data

124 // into the beginning of the target data. This VCDIFF encoder will never

125 // produce a COPY instruction of this third type (regardless of the value of

126 // look_for_target_matches) because the cost of checking for matches

127 // across the source-target boundary would not justify its benefits.

128 //

129 VCDiffStreamingEncoder(const HashedDictionary* dictionary,

130 VCDiffFormatExtensionFlags format_extensions,

131 bool look_for_target_matches);

132 ~VCDiffStreamingEncoder();

133

134 // The client should use these routines as follows:

135 // HashedDictionary hd(dictionary, dictionary_size);

136 // if (!hd.Init()) {

137 // HandleError();

138 // return;

139 // }

140 // string output_string;

141 // VCDiffStreamingEncoder v(hd, false, false);

142 // if (!v.StartEncoding(&output_string)) {

143 // HandleError();

144 // return; // No need to call FinishEncoding()

145 // }

146 // Process(output_string.data(), output_string.size());

147 // output_string.clear();

148 // while (get data_buf) {

149 // if (!v.EncodeChunk(data_buf, data_len, &output_string)) {

150 // HandleError();

151 // return; // No need to call FinishEncoding()

152 // }

153 // // The encoding is appended to output_string at each call,

154 // // so clear output_string once its contents have been processed.

155 // Process(output_string.data(), output_string.size());

156 // output_string.clear();

157 // }

158 // if (!v.FinishEncoding(&output_string)) {

159 // HandleError();

160 // return;

161 // }

162 // Process(output_string.data(), output_string.size());

163 // output_string.clear();

164 //

165 // I.e., the allowed pattern of calls is

166 // StartEncoding EncodeChunk* FinishEncoding

167 //

168 // The size of the encoded output depends on the sizes of the chunks

169 // passed in (i.e. the chunking boundary affects compression).

170 // However the decoded output is independent of chunk boundaries.

171

172 // Sets up the data structures for encoding.

173 // Writes a VCDIFF delta file header (as defined in RFC section 4.1)

174 // to *output_string.

175 //

176 // Note: we append, so the old contents of *output_string stick around.

177 // This convention differs from the non-streaming Encode/Decode

178 // interfaces in VCDiffEncoder.

179 //

180 // If an error occurs, this function returns false; otherwise it returns true.

181 // If this function returns false, the caller does not need to call

182 // FinishEncoding or to do any cleanup except destroying the

183 // VCDiffStreamingEncoder object.

184 template<class OutputType>

185 bool StartEncoding(OutputType* output) {

186 OutputString<OutputType> output_string(output);

187 return StartEncodingToInterface(&output_string);

188 }

189

190 bool StartEncodingToInterface(OutputStringInterface* output_string);

191

192 // Appends compressed encoding for "data" (one complete VCDIFF delta window)

193 // to *output_string.

194 // If an error occurs (for example, if StartEncoding was not called

195 // earlier or StartEncoding returned false), this function returns false;

196 // otherwise it returns true. The caller does not need to call FinishEncoding

197 // or do any cleanup except destroying the VCDiffStreamingEncoder

198 // if this function returns false.

199 template<class OutputType>

200 bool EncodeChunk(const char* data, size_t len, OutputType* output) {

201 OutputString<OutputType> output_string(output);

202 return EncodeChunkToInterface(data, len, &output_string);

203 }

204

205 bool EncodeChunkToInterface(const char* data, size_t len,

206 OutputStringInterface* output_string);

207

208 // Finishes encoding and appends any leftover encoded data to *output_string.

209 // If an error occurs (for example, if StartEncoding was not called

210 // earlier or StartEncoding returned false), this function returns false;

211 // otherwise it returns true. The caller does not need to

212 // do any cleanup except destroying the VCDiffStreamingEncoder

213 // if this function returns false.

214 template<class OutputType>

215 bool FinishEncoding(OutputType* output) {

216 OutputString<OutputType> output_string(output);

217 return FinishEncodingToInterface(&output_string);

218 }

219

220 bool FinishEncodingToInterface(OutputStringInterface* output_string);

221

222 // Replaces the contents of match_counts with a vector of integers,

223 // one for each possible match length. The value of match_counts[n]

224 // is equal to the number of matches of length n found so far

225 // for this VCDiffStreamingEncoder object.

226 void GetMatchCounts(std::vector<int>* match_counts) const;

227

228 private:

229 VCDiffStreamingEncoderImpl* const impl_;

230

231 // Make the copy constructor and assignment operator private

232 // so that they don't inadvertently get used.

233 VCDiffStreamingEncoder(const VCDiffStreamingEncoder&); // NOLINT

234 void operator=(const VCDiffStreamingEncoder&);

235 };

236

237 // A simpler (non-streaming) interface to the VCDIFF encoder that can be used

238 // if the entire target data string is available.

239 //

240 class VCDiffEncoder {

241 public:

242 VCDiffEncoder(const char* dictionary_contents, size_t dictionary_size)

243 : dictionary_(dictionary_contents, dictionary_size),

244 encoder_(NULL),

245 flags_(VCD_STANDARD_FORMAT) { }

246

247 ~VCDiffEncoder() {

248 delete encoder_;

249 }

250

251 // By default, VCDiffEncoder uses standard VCDIFF format. This function

252 // can be used before calling Encode(), to specify that interleaved format

253 // and/or checksum format should be used.

254 void SetFormatFlags(VCDiffFormatExtensionFlags flags) { flags_ = flags; }

255

256 // Replaces old contents of output_string with the encoded form of

257 // target_data.

258 template<class OutputType>

259 bool Encode(const char* target_data,

260 size_t target_len,

261 OutputType* output) {

262 OutputString<OutputType> output_string(output);

263 return EncodeToInterface(target_data, target_len, &output_string);

264 }

265

266 private:

267 // Always look for matches in both source and target. This default value

268 // can be changed in this code if desired.

269 static const bool look_for_target_matches_ = true;

270

271 bool EncodeToInterface(const char* target_data,

272 size_t target_len,

273 OutputStringInterface* output_string);

274

275 HashedDictionary dictionary_;

276 VCDiffStreamingEncoder* encoder_;

277 VCDiffFormatExtensionFlags flags_;

278

279 // Make the copy constructor and assignment operator private

280 // so that they don't inadvertently get used.

281 VCDiffEncoder(const VCDiffEncoder&); // NOLINT

282 void operator=(const VCDiffEncoder&);

283 };

284

285 } // namespace open_vcdiff

286

287 #endif // OPEN_VCDIFF_VCENCODER_H_

OLD	NEW