Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(228)

Side by Side Diff: third_party/courgette/streams.cc

Issue 115062: Move Courgette... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 11 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « third_party/courgette/streams.h ('k') | third_party/courgette/streams_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Streams classes.
6 //
7 // These memory-resident streams are used for serializing data into a sequential
8 // region of memory.
9 //
10 // Streams are divided into SourceStreams for reading and SinkStreams for
11 // writing. Streams are aggregated into Sets which allows several streams to be
12 // used at once. Example: we can write A1, B1, A2, B2 but achieve the memory
13 // layout A1 A2 B1 B2 by writing 'A's to one stream and 'B's to another.
14 //
15 // The aggregated streams are important to Courgette's compression efficiency,
16 // we use it to cluster similar kinds of data which helps to generate longer
17 // common subsequences and repeated sequences.
18
19 #include "third_party/courgette/streams.h"
20
21 #include <io.h>
22 #include <memory.h>
23
24 #include "base/basictypes.h"
25
26 namespace courgette {
27
28 // Update this version number if the serialization format of a StreamSet
29 // changes.
30 static const unsigned int kStreamsSerializationFormatVersion = 20090218;
31
32 //
33 // This is a cut down Varint implementation, implementing only what we use for
34 // streams. The original implementation is at google3/util/coding/varint.h
35 //
36 class Varint {
37 public:
38 // Maximum lengths of varint encoding of uint32
39 static const int kMax32 = 5;
40
41 // Parses a Varint32 encoded value from |source| and stores it in |output|,
42 // and returns a pointer to the following byte. Returns NULL if a valid
43 // varint value was not found before |limit|.
44 static const uint8* Parse32WithLimit(const uint8* source, const uint8* limit,
45 uint32* output);
46
47 // Writes the Varint32 encoded representation of |value| to buffer
48 // |destination|. |destination| must have sufficient length to hold kMax32
49 // bytes. Returns a pointer to the byte just past the last encoded byte.
50 static uint8* Encode32(uint8* destination, uint32 value);
51 };
52
53 // Parses a Varint32 encoded unsigned number from |source|. The Varint32
54 // encoding is a little-endian sequence of bytes containing base-128 digits,
55 // with the high order bit set to indicate if there are more digits.
56 //
57 // For each byte, we mask out the digit and 'or' it into the right place in the
58 // result.
59 //
60 // The digit loop is unrolled for performance. It usually exits after the first
61 // one or two digits.
62 const uint8* Varint::Parse32WithLimit(const uint8* source,
63 const uint8* limit,
64 uint32* output) {
65 uint32 digit, result;
66 if (source >= limit)
67 return NULL;
68 digit = *(source++);
69 result = digit & 127;
70 if (digit < 128) {
71 *output = result;
72 return source;
73 }
74
75 if (source >= limit)
76 return NULL;
77 digit = *(source++);
78 result |= (digit & 127) << 7;
79 if (digit < 128) {
80 *output = result;
81 return source;
82 }
83
84 if (source >= limit)
85 return NULL;
86 digit = *(source++);
87 result |= (digit & 127) << 14;
88 if (digit < 128) {
89 *output = result;
90 return source;
91 }
92
93 if (source >= limit)
94 return NULL;
95 digit = *(source++);
96 result |= (digit & 127) << 21;
97 if (digit < 128) {
98 *output = result;
99 return source;
100 }
101
102 if (source >= limit)
103 return NULL;
104 digit = *(source++);
105 result |= (digit & 127) << 28;
106 if (digit < 128) {
107 *output = result;
108 return source;
109 }
110
111 return NULL; // Value is too long to be a Varint32.
112 }
113
114 // Write the base-128 digits in little-endian order. All except the last digit
115 // have the high bit set to indicate more digits.
116 inline uint8* Varint::Encode32(uint8* destination, uint32 value) {
117 while (value >= 128) {
118 *(destination++) = value | 128;
119 value = value >> 7;
120 }
121 *(destination++) = value;
122 return destination;
123 }
124
125 void SourceStream::Init(const SinkStream& sink) {
126 Init(sink.Buffer(), sink.Length());
127 }
128
129 bool SourceStream::Read(void* destination, size_t count) {
130 if (current_ + count > end_)
131 return false;
132 memcpy(destination, current_, count);
133 current_ += count;
134 return true;
135 }
136
137 bool SourceStream::ReadVarint32(uint32* output_value) {
138 const uint8* after = Varint::Parse32WithLimit(current_, end_, output_value);
139 if (!after)
140 return false;
141 current_ = after;
142 return true;
143 }
144
145 bool SourceStream::ReadVarint32Signed(int32* output_value) {
146 // Signed numbers are encoded as unsigned numbers so that numbers nearer zero
147 // have shorter varint encoding.
148 // 0000xxxx encoded as 000xxxx0.
149 // 1111xxxx encoded as 000yyyy1 where yyyy is complement of xxxx.
150 uint32 unsigned_value;
151 if (!ReadVarint32(&unsigned_value))
152 return false;
153 if (unsigned_value & 1)
154 *output_value = ~static_cast<int32>(unsigned_value >> 1);
155 else
156 *output_value = (unsigned_value >> 1);
157 return true;
158 }
159
160 bool SourceStream::ShareSubstream(size_t offset, size_t length,
161 SourceStream* substream) {
162 if (offset > Remaining())
163 return false;
164 if (length > Remaining() - offset)
165 return false;
166 substream->Init(current_ + offset, length);
167 return true;
168 }
169
170 bool SourceStream::ReadSubstream(size_t length, SourceStream* substream) {
171 if (!ShareSubstream(0, length, substream))
172 return false;
173 current_ += length;
174 return true;
175 }
176
177 bool SourceStream::Skip(size_t byte_count) {
178 if (current_ + byte_count > end_)
179 return false;
180 current_ += byte_count;
181 return true;
182 }
183
184 void SinkStream::Write(const void* data, size_t byte_count) {
185 buffer_.append(static_cast<const char*>(data), byte_count);
186 }
187
188 void SinkStream::WriteVarint32(uint32 value) {
189 uint8 buffer[Varint::kMax32];
190 uint8* end = Varint::Encode32(buffer, value);
191 Write(buffer, end - buffer);
192 }
193
194 void SinkStream::WriteVarint32Signed(int32 value) {
195 // Encode signed numbers so that numbers nearer zero have shorter
196 // varint encoding.
197 // 0000xxxx encoded as 000xxxx0.
198 // 1111xxxx encoded as 000yyyy1 where yyyy is complement of xxxx.
199 if (value < 0)
200 WriteVarint32(~value * 2 + 1);
201 else
202 WriteVarint32(value * 2);
203 }
204
205 void SinkStream::Append(SinkStream* other) {
206 Write(other->buffer_.c_str(), other->buffer_.size());
207 other->buffer_.clear();
208 other->buffer_.reserve(0); // Non-binding request to reduce storage.
209 }
210
211 ////////////////////////////////////////////////////////////////////////////////
212
213 SourceStreamSet::SourceStreamSet()
214 : count_(kMaxStreams) {
215 }
216
217 SourceStreamSet::~SourceStreamSet() {
218 }
219
220
221 // Initializes from |source|.
222 // The stream set for N streams is serialized as a header
223 // <version><N><length1><length2>...<lengthN>
224 // followed by the stream contents
225 // <bytes1><bytes2>...<bytesN>
226 //
227 bool SourceStreamSet::Init(const void* source, size_t byte_count) {
228 const uint8* start = static_cast<const uint8*>(source);
229 const uint8* end = start + byte_count;
230
231 unsigned int version;
232 const uint8* finger = Varint::Parse32WithLimit(start, end, &version);
233 if (finger == NULL)
234 return false;
235 if (version != kStreamsSerializationFormatVersion)
236 return false;
237
238 unsigned int count;
239 finger = Varint::Parse32WithLimit(finger, end, &count);
240 if (finger == NULL)
241 return false;
242 if (count > kMaxStreams)
243 return false;
244
245 count_ = count;
246
247 unsigned int lengths[kMaxStreams];
248 size_t accumulated_length = 0;
249
250 for (size_t i = 0; i < count_; ++i) {
251 finger = Varint::Parse32WithLimit(finger, end, &lengths[i]);
252 if (finger == NULL)
253 return false;
254 accumulated_length += lengths[i];
255 }
256
257 // Remaining bytes should add up to sum of lengths.
258 if (end - finger != accumulated_length)
259 return false;
260
261 accumulated_length = finger - start;
262 for (size_t i = 0; i < count_; ++i) {
263 stream(i)->Init(start + accumulated_length, lengths[i]);
264 accumulated_length += lengths[i];
265 }
266
267 return true;
268 }
269
270 bool SourceStreamSet::Init(SourceStream* source) {
271 // TODO(sra): consume the rest of |source|.
272 return Init(source->Buffer(), source->Remaining());
273 }
274
275 bool SourceStreamSet::ReadSet(SourceStreamSet* set) {
276 uint32 stream_count = 0;
277 SourceStream* control_stream = this->stream(0);
278 if (!control_stream->ReadVarint32(&stream_count))
279 return false;
280
281 uint32 lengths[kMaxStreams] = {}; // i.e. all zero.
282
283 for (size_t i = 0; i < stream_count; ++i) {
284 if (!control_stream->ReadVarint32(&lengths[i]))
285 return false;
286 }
287
288 for (size_t i = 0; i < stream_count; ++i) {
289 if (!this->stream(i)->ReadSubstream(lengths[i], set->stream(i)))
290 return false;
291 }
292 return true;
293 }
294
295 bool SourceStreamSet::Empty() const {
296 for (size_t i = 0; i < count_; ++i) {
297 if (streams_[i].Remaining() != 0)
298 return false;
299 }
300 return true;
301 }
302
303 ////////////////////////////////////////////////////////////////////////////////
304
305 SinkStreamSet::SinkStreamSet()
306 : count_(kMaxStreams) {
307 }
308
309 SinkStreamSet::~SinkStreamSet() {
310 }
311
312 void SinkStreamSet::Init(size_t stream_index_limit) {
313 count_ = stream_index_limit;
314 }
315
316 // The header for a stream set for N streams is serialized as
317 // <version><N><length1><length2>...<lengthN>
318 void SinkStreamSet::CopyHeaderTo(SinkStream* header) {
319 header->WriteVarint32(kStreamsSerializationFormatVersion);
320 header->WriteVarint32(count_);
321 for (size_t i = 0; i < count_; ++i) {
322 header->WriteVarint32(stream(i)->Length());
323 }
324 }
325
326 // Writes |this| to |combined_stream|. See SourceStreamSet::Init for the layout
327 // of the stream metadata and contents.
328 bool SinkStreamSet::CopyTo(SinkStream *combined_stream) {
329 SinkStream header;
330 CopyHeaderTo(&header);
331 combined_stream->Append(&header);
332 for (size_t i = 0; i < count_; ++i) {
333 combined_stream->Append(stream(i));
334 }
335 return true;
336 }
337
338 namespace {
339 bool Write(int file_descriptor, SinkStream* sink) {
340 size_t length = sink->Length();
341 const void *buffer = sink->Buffer();
342 int bytes_written = _write(file_descriptor, buffer, length);
343 return bytes_written == length;
344 }
345 }
346
347 bool SinkStreamSet::CopyToFileDescriptor(int file_descriptor) {
348 SinkStream header;
349 CopyHeaderTo(&header);
350 if (!Write(file_descriptor, &header))
351 return false;
352 for (size_t i = 0; i < count_; ++i) {
353 if (!Write(file_descriptor, stream(i)))
354 return false;
355 }
356 return true;
357 }
358
359 bool SinkStreamSet::WriteSet(SinkStreamSet* set) {
360 uint32 lengths[kMaxStreams];
361 // 'stream_count' includes all non-empty streams and all empty stream numbered
362 // lower than a non-empty stream.
363 size_t stream_count = 0;
364 for (size_t i = 0; i < kMaxStreams; ++i) {
365 SinkStream* stream = set->stream(i);
366 lengths[i] = stream->Length();
367 if (lengths[i] > 0)
368 stream_count = i + 1;
369 }
370
371 SinkStream* control_stream = this->stream(0);
372 control_stream->WriteVarint32(stream_count);
373 for (size_t i = 0; i < stream_count; ++i) {
374 control_stream->WriteVarint32(lengths[i]);
375 }
376
377 for (size_t i = 0; i < stream_count; ++i) {
378 this->stream(i)->Append(set->stream(i));
379 }
380 return true;
381 }
382
383 } // namespace
OLDNEW
« no previous file with comments | « third_party/courgette/streams.h ('k') | third_party/courgette/streams_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698