Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(328)

Side by Side Diff: sdch/open_vcdiff/depot/opensource/open-vcdiff/src/google/vcencoder.h

Issue 5203: Transition to pulling open-vcdiff from repository, instead of using snapshot... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: Created 12 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright 2007 Google Inc.
2 // Author: Lincoln Smith
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15
16 #ifndef OPEN_VCDIFF_VCENCODER_H_
17 #define OPEN_VCDIFF_VCENCODER_H_
18
19 #include <cstddef> // size_t
20 #include <vector>
21 #include "google/output_string.h"
22
23 namespace open_vcdiff {
24
25 class VCDiffEngine;
26 class VCDiffStreamingEncoderImpl;
27
28 // These flags are passed to the constructor of VCDiffStreamingEncoder
29 // to determine whether certain open-vcdiff format extensions
30 // (which are not part of the RFC 3284 draft standard for VCDIFF)
31 // are employed.
32 //
33 // Because these extensions are not part of the VCDIFF standard, if
34 // any of these flags except VCD_STANDARD_FORMAT is specified, then the caller
35 // must be certain that the receiver of the data will be using open-vcdiff
36 // to decode the delta file, or at least that the receiver can interpret
37 // these extensions. The encoder will use an 'S' as the fourth character
38 // in the delta file to indicate that non-standard extensions are being used.
39 //
40 enum VCDiffFormatExtensionFlagValues {
41 // No extensions: the encoded format will conform to the RFC
42 // draft standard for VCDIFF.
43 VCD_STANDARD_FORMAT = 0x00,
44 // If this flag is specified, then the encoder writes each delta file
45 // window by interleaving instructions and sizes with their corresponding
46 // addresses and data, rather than placing these elements
47 // into three separate sections. This facilitates providing partially
48 // decoded results when only a portion of a delta file window is received
49 // (e.g. when HTTP over TCP is used as the transmission protocol.)
50 VCD_FORMAT_INTERLEAVED = 0x01,
51 // If this flag is specified, then an Adler32 checksum
52 // of the target window data is included in the delta window.
53 VCD_FORMAT_CHECKSUM = 0x02
54 };
55
56 typedef int VCDiffFormatExtensionFlags;
57
58 // A HashedDictionary must be constructed from the dictionary data
59 // in order to use VCDiffStreamingEncoder. If the same dictionary will
60 // be used to perform several encoding operations, then the caller should
61 // create the HashedDictionary once and cache it for reuse. This object
62 // is thread-safe: the same const HashedDictionary can be used
63 // by several threads simultaneously, each with its own VCDiffStreamingEncoder.
64 //
65 // dictionary_contents is copied into the HashedDictionary, so the
66 // caller may free that string, if desired, after the constructor returns.
67 //
68 class HashedDictionary {
69 public:
70 HashedDictionary(const char* dictionary_contents,
71 size_t dictionary_size);
72 ~HashedDictionary();
73
74 // Init() must be called before using the HashedDictionary as an argument
75 // to the VCDiffStreamingEncoder, or for any other purpose except
76 // destruction. It returns true if initialization succeeded, or false
77 // if an error occurred, in which case the caller should destroy the object
78 // without using it.
79 bool Init();
80
81 const VCDiffEngine* engine() const { return engine_; }
82
83 private:
84 const VCDiffEngine* engine_;
85 };
86
87 // The standard streaming interface to the VCDIFF (RFC 3284) encoder.
88 // "Streaming" in this context means that, even though the entire set of
89 // input data to be encoded may not be available at once, the encoder
90 // can produce partial output based on what is available. Of course,
91 // the caller should try to maximize the sizes of the data chunks passed
92 // to the encoder.
93 class VCDiffStreamingEncoder {
94 public:
95 // The HashedDictionary object passed to the constructor must remain valid,
96 // without being deleted, for the lifetime of the VCDiffStreamingEncoder
97 // object.
98 //
99 // format_extensions allows certain open-vcdiff extensions to the VCDIFF
100 // format to be included in the encoded output. These extensions are not
101 // part of the RFC 3284 draft standard, so specifying any extension flags
102 // will make the output compatible only with open-vcdiff, or with other
103 // VCDIFF implementations that accept these extensions. See above for an
104 // explanation of each possible flag value.
105 //
106 // *** look_for_target_matches:
107 // The VCDIFF format allows COPY instruction addresses to reference data from
108 // the source (dictionary), or from previously encoded target data.
109 //
110 // If look_for_target_matches is false, then the encoder will only
111 // produce COPY instructions that reference source data from the dictionary,
112 // never from previously encoded target data. This will speed up the encoding
113 // process, but the encoded data will not be as compact.
114 //
115 // If this value is true, then the encoder will produce COPY instructions
116 // that reference either source data or target data. A COPY instruction from
117 // the previously encoded target data may even extend into the range of the
118 // data being produced by that same COPY instruction; for example, if the
119 // previously encoded target data is "LA", then a single COPY instruction of
120 // length 10 can produce the additional target data "LALALALALA".
121 //
122 // There is a third type of COPY instruction that starts within
123 // the source data and extends from the end of the source data
124 // into the beginning of the target data. This VCDIFF encoder will never
125 // produce a COPY instruction of this third type (regardless of the value of
126 // look_for_target_matches) because the cost of checking for matches
127 // across the source-target boundary would not justify its benefits.
128 //
129 VCDiffStreamingEncoder(const HashedDictionary* dictionary,
130 VCDiffFormatExtensionFlags format_extensions,
131 bool look_for_target_matches);
132 ~VCDiffStreamingEncoder();
133
134 // The client should use these routines as follows:
135 // HashedDictionary hd(dictionary, dictionary_size);
136 // if (!hd.Init()) {
137 // HandleError();
138 // return;
139 // }
140 // string output_string;
141 // VCDiffStreamingEncoder v(hd, false, false);
142 // if (!v.StartEncoding(&output_string)) {
143 // HandleError();
144 // return; // No need to call FinishEncoding()
145 // }
146 // Process(output_string.data(), output_string.size());
147 // output_string.clear();
148 // while (get data_buf) {
149 // if (!v.EncodeChunk(data_buf, data_len, &output_string)) {
150 // HandleError();
151 // return; // No need to call FinishEncoding()
152 // }
153 // // The encoding is appended to output_string at each call,
154 // // so clear output_string once its contents have been processed.
155 // Process(output_string.data(), output_string.size());
156 // output_string.clear();
157 // }
158 // if (!v.FinishEncoding(&output_string)) {
159 // HandleError();
160 // return;
161 // }
162 // Process(output_string.data(), output_string.size());
163 // output_string.clear();
164 //
165 // I.e., the allowed pattern of calls is
166 // StartEncoding EncodeChunk* FinishEncoding
167 //
168 // The size of the encoded output depends on the sizes of the chunks
169 // passed in (i.e. the chunking boundary affects compression).
170 // However the decoded output is independent of chunk boundaries.
171
172 // Sets up the data structures for encoding.
173 // Writes a VCDIFF delta file header (as defined in RFC section 4.1)
174 // to *output_string.
175 //
176 // Note: we *append*, so the old contents of *output_string stick around.
177 // This convention differs from the non-streaming Encode/Decode
178 // interfaces in VCDiffEncoder.
179 //
180 // If an error occurs, this function returns false; otherwise it returns true.
181 // If this function returns false, the caller does not need to call
182 // FinishEncoding or to do any cleanup except destroying the
183 // VCDiffStreamingEncoder object.
184 template<class OutputType>
185 bool StartEncoding(OutputType* output) {
186 OutputString<OutputType> output_string(output);
187 return StartEncodingToInterface(&output_string);
188 }
189
190 bool StartEncodingToInterface(OutputStringInterface* output_string);
191
192 // Appends compressed encoding for "data" (one complete VCDIFF delta window)
193 // to *output_string.
194 // If an error occurs (for example, if StartEncoding was not called
195 // earlier or StartEncoding returned false), this function returns false;
196 // otherwise it returns true. The caller does not need to call FinishEncoding
197 // or do any cleanup except destroying the VCDiffStreamingEncoder
198 // if this function returns false.
199 template<class OutputType>
200 bool EncodeChunk(const char* data, size_t len, OutputType* output) {
201 OutputString<OutputType> output_string(output);
202 return EncodeChunkToInterface(data, len, &output_string);
203 }
204
205 bool EncodeChunkToInterface(const char* data, size_t len,
206 OutputStringInterface* output_string);
207
208 // Finishes encoding and appends any leftover encoded data to *output_string.
209 // If an error occurs (for example, if StartEncoding was not called
210 // earlier or StartEncoding returned false), this function returns false;
211 // otherwise it returns true. The caller does not need to
212 // do any cleanup except destroying the VCDiffStreamingEncoder
213 // if this function returns false.
214 template<class OutputType>
215 bool FinishEncoding(OutputType* output) {
216 OutputString<OutputType> output_string(output);
217 return FinishEncodingToInterface(&output_string);
218 }
219
220 bool FinishEncodingToInterface(OutputStringInterface* output_string);
221
222 // Replaces the contents of match_counts with a vector of integers,
223 // one for each possible match length. The value of match_counts[n]
224 // is equal to the number of matches of length n found so far
225 // for this VCDiffStreamingEncoder object.
226 void GetMatchCounts(std::vector<int>* match_counts) const;
227
228 private:
229 VCDiffStreamingEncoderImpl* const impl_;
230
231 // Make the copy constructor and assignment operator private
232 // so that they don't inadvertently get used.
233 VCDiffStreamingEncoder(const VCDiffStreamingEncoder&); // NOLINT
234 void operator=(const VCDiffStreamingEncoder&);
235 };
236
237 // A simpler (non-streaming) interface to the VCDIFF encoder that can be used
238 // if the entire target data string is available.
239 //
240 class VCDiffEncoder {
241 public:
242 VCDiffEncoder(const char* dictionary_contents, size_t dictionary_size)
243 : dictionary_(dictionary_contents, dictionary_size),
244 encoder_(NULL),
245 flags_(VCD_STANDARD_FORMAT) { }
246
247 ~VCDiffEncoder() {
248 delete encoder_;
249 }
250
251 // By default, VCDiffEncoder uses standard VCDIFF format. This function
252 // can be used before calling Encode(), to specify that interleaved format
253 // and/or checksum format should be used.
254 void SetFormatFlags(VCDiffFormatExtensionFlags flags) { flags_ = flags; }
255
256 // Replaces old contents of output_string with the encoded form of
257 // target_data.
258 template<class OutputType>
259 bool Encode(const char* target_data,
260 size_t target_len,
261 OutputType* output) {
262 OutputString<OutputType> output_string(output);
263 return EncodeToInterface(target_data, target_len, &output_string);
264 }
265
266 private:
267 // Always look for matches in both source and target. This default value
268 // can be changed in this code if desired.
269 static const bool look_for_target_matches_ = true;
270
271 bool EncodeToInterface(const char* target_data,
272 size_t target_len,
273 OutputStringInterface* output_string);
274
275 HashedDictionary dictionary_;
276 VCDiffStreamingEncoder* encoder_;
277 VCDiffFormatExtensionFlags flags_;
278
279 // Make the copy constructor and assignment operator private
280 // so that they don't inadvertently get used.
281 VCDiffEncoder(const VCDiffEncoder&); // NOLINT
282 void operator=(const VCDiffEncoder&);
283 };
284
285 } // namespace open_vcdiff
286
287 #endif // OPEN_VCDIFF_VCENCODER_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698