OLD | NEW |
| (Empty) |
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // Streams classes. | |
6 // | |
7 // These memory-resident streams are used for serializing data into a sequential | |
8 // region of memory. | |
9 // | |
10 // Streams are divided into SourceStreams for reading and SinkStreams for | |
11 // writing. Streams are aggregated into Sets which allows several streams to be | |
12 // used at once. Example: we can write A1, B1, A2, B2 but achieve the memory | |
13 // layout A1 A2 B1 B2 by writing 'A's to one stream and 'B's to another. | |
14 // | |
15 // The aggregated streams are important to Courgette's compression efficiency, | |
16 // we use it to cluster similar kinds of data which helps to generate longer | |
17 // common subsequences and repeated sequences. | |
18 | |
19 #include "third_party/courgette/streams.h" | |
20 | |
21 #include <io.h> | |
22 #include <memory.h> | |
23 | |
24 #include "base/basictypes.h" | |
25 | |
26 namespace courgette { | |
27 | |
28 // Update this version number if the serialization format of a StreamSet | |
29 // changes. | |
30 static const unsigned int kStreamsSerializationFormatVersion = 20090218; | |
31 | |
32 // | |
33 // This is a cut down Varint implementation, implementing only what we use for | |
34 // streams. The original implementation is at google3/util/coding/varint.h | |
35 // | |
36 class Varint { | |
37 public: | |
38 // Maximum lengths of varint encoding of uint32 | |
39 static const int kMax32 = 5; | |
40 | |
41 // Parses a Varint32 encoded value from |source| and stores it in |output|, | |
42 // and returns a pointer to the following byte. Returns NULL if a valid | |
43 // varint value was not found before |limit|. | |
44 static const uint8* Parse32WithLimit(const uint8* source, const uint8* limit, | |
45 uint32* output); | |
46 | |
47 // Writes the Varint32 encoded representation of |value| to buffer | |
48 // |destination|. |destination| must have sufficient length to hold kMax32 | |
49 // bytes. Returns a pointer to the byte just past the last encoded byte. | |
50 static uint8* Encode32(uint8* destination, uint32 value); | |
51 }; | |
52 | |
53 // Parses a Varint32 encoded unsigned number from |source|. The Varint32 | |
54 // encoding is a little-endian sequence of bytes containing base-128 digits, | |
55 // with the high order bit set to indicate if there are more digits. | |
56 // | |
57 // For each byte, we mask out the digit and 'or' it into the right place in the | |
58 // result. | |
59 // | |
60 // The digit loop is unrolled for performance. It usually exits after the first | |
61 // one or two digits. | |
62 const uint8* Varint::Parse32WithLimit(const uint8* source, | |
63 const uint8* limit, | |
64 uint32* output) { | |
65 uint32 digit, result; | |
66 if (source >= limit) | |
67 return NULL; | |
68 digit = *(source++); | |
69 result = digit & 127; | |
70 if (digit < 128) { | |
71 *output = result; | |
72 return source; | |
73 } | |
74 | |
75 if (source >= limit) | |
76 return NULL; | |
77 digit = *(source++); | |
78 result |= (digit & 127) << 7; | |
79 if (digit < 128) { | |
80 *output = result; | |
81 return source; | |
82 } | |
83 | |
84 if (source >= limit) | |
85 return NULL; | |
86 digit = *(source++); | |
87 result |= (digit & 127) << 14; | |
88 if (digit < 128) { | |
89 *output = result; | |
90 return source; | |
91 } | |
92 | |
93 if (source >= limit) | |
94 return NULL; | |
95 digit = *(source++); | |
96 result |= (digit & 127) << 21; | |
97 if (digit < 128) { | |
98 *output = result; | |
99 return source; | |
100 } | |
101 | |
102 if (source >= limit) | |
103 return NULL; | |
104 digit = *(source++); | |
105 result |= (digit & 127) << 28; | |
106 if (digit < 128) { | |
107 *output = result; | |
108 return source; | |
109 } | |
110 | |
111 return NULL; // Value is too long to be a Varint32. | |
112 } | |
113 | |
114 // Write the base-128 digits in little-endian order. All except the last digit | |
115 // have the high bit set to indicate more digits. | |
116 inline uint8* Varint::Encode32(uint8* destination, uint32 value) { | |
117 while (value >= 128) { | |
118 *(destination++) = value | 128; | |
119 value = value >> 7; | |
120 } | |
121 *(destination++) = value; | |
122 return destination; | |
123 } | |
124 | |
125 void SourceStream::Init(const SinkStream& sink) { | |
126 Init(sink.Buffer(), sink.Length()); | |
127 } | |
128 | |
129 bool SourceStream::Read(void* destination, size_t count) { | |
130 if (current_ + count > end_) | |
131 return false; | |
132 memcpy(destination, current_, count); | |
133 current_ += count; | |
134 return true; | |
135 } | |
136 | |
137 bool SourceStream::ReadVarint32(uint32* output_value) { | |
138 const uint8* after = Varint::Parse32WithLimit(current_, end_, output_value); | |
139 if (!after) | |
140 return false; | |
141 current_ = after; | |
142 return true; | |
143 } | |
144 | |
145 bool SourceStream::ReadVarint32Signed(int32* output_value) { | |
146 // Signed numbers are encoded as unsigned numbers so that numbers nearer zero | |
147 // have shorter varint encoding. | |
148 // 0000xxxx encoded as 000xxxx0. | |
149 // 1111xxxx encoded as 000yyyy1 where yyyy is complement of xxxx. | |
150 uint32 unsigned_value; | |
151 if (!ReadVarint32(&unsigned_value)) | |
152 return false; | |
153 if (unsigned_value & 1) | |
154 *output_value = ~static_cast<int32>(unsigned_value >> 1); | |
155 else | |
156 *output_value = (unsigned_value >> 1); | |
157 return true; | |
158 } | |
159 | |
160 bool SourceStream::ShareSubstream(size_t offset, size_t length, | |
161 SourceStream* substream) { | |
162 if (offset > Remaining()) | |
163 return false; | |
164 if (length > Remaining() - offset) | |
165 return false; | |
166 substream->Init(current_ + offset, length); | |
167 return true; | |
168 } | |
169 | |
170 bool SourceStream::ReadSubstream(size_t length, SourceStream* substream) { | |
171 if (!ShareSubstream(0, length, substream)) | |
172 return false; | |
173 current_ += length; | |
174 return true; | |
175 } | |
176 | |
177 bool SourceStream::Skip(size_t byte_count) { | |
178 if (current_ + byte_count > end_) | |
179 return false; | |
180 current_ += byte_count; | |
181 return true; | |
182 } | |
183 | |
184 void SinkStream::Write(const void* data, size_t byte_count) { | |
185 buffer_.append(static_cast<const char*>(data), byte_count); | |
186 } | |
187 | |
188 void SinkStream::WriteVarint32(uint32 value) { | |
189 uint8 buffer[Varint::kMax32]; | |
190 uint8* end = Varint::Encode32(buffer, value); | |
191 Write(buffer, end - buffer); | |
192 } | |
193 | |
194 void SinkStream::WriteVarint32Signed(int32 value) { | |
195 // Encode signed numbers so that numbers nearer zero have shorter | |
196 // varint encoding. | |
197 // 0000xxxx encoded as 000xxxx0. | |
198 // 1111xxxx encoded as 000yyyy1 where yyyy is complement of xxxx. | |
199 if (value < 0) | |
200 WriteVarint32(~value * 2 + 1); | |
201 else | |
202 WriteVarint32(value * 2); | |
203 } | |
204 | |
205 void SinkStream::Append(SinkStream* other) { | |
206 Write(other->buffer_.c_str(), other->buffer_.size()); | |
207 other->buffer_.clear(); | |
208 other->buffer_.reserve(0); // Non-binding request to reduce storage. | |
209 } | |
210 | |
211 //////////////////////////////////////////////////////////////////////////////// | |
212 | |
213 SourceStreamSet::SourceStreamSet() | |
214 : count_(kMaxStreams) { | |
215 } | |
216 | |
217 SourceStreamSet::~SourceStreamSet() { | |
218 } | |
219 | |
220 | |
221 // Initializes from |source|. | |
222 // The stream set for N streams is serialized as a header | |
223 // <version><N><length1><length2>...<lengthN> | |
224 // followed by the stream contents | |
225 // <bytes1><bytes2>...<bytesN> | |
226 // | |
227 bool SourceStreamSet::Init(const void* source, size_t byte_count) { | |
228 const uint8* start = static_cast<const uint8*>(source); | |
229 const uint8* end = start + byte_count; | |
230 | |
231 unsigned int version; | |
232 const uint8* finger = Varint::Parse32WithLimit(start, end, &version); | |
233 if (finger == NULL) | |
234 return false; | |
235 if (version != kStreamsSerializationFormatVersion) | |
236 return false; | |
237 | |
238 unsigned int count; | |
239 finger = Varint::Parse32WithLimit(finger, end, &count); | |
240 if (finger == NULL) | |
241 return false; | |
242 if (count > kMaxStreams) | |
243 return false; | |
244 | |
245 count_ = count; | |
246 | |
247 unsigned int lengths[kMaxStreams]; | |
248 size_t accumulated_length = 0; | |
249 | |
250 for (size_t i = 0; i < count_; ++i) { | |
251 finger = Varint::Parse32WithLimit(finger, end, &lengths[i]); | |
252 if (finger == NULL) | |
253 return false; | |
254 accumulated_length += lengths[i]; | |
255 } | |
256 | |
257 // Remaining bytes should add up to sum of lengths. | |
258 if (end - finger != accumulated_length) | |
259 return false; | |
260 | |
261 accumulated_length = finger - start; | |
262 for (size_t i = 0; i < count_; ++i) { | |
263 stream(i)->Init(start + accumulated_length, lengths[i]); | |
264 accumulated_length += lengths[i]; | |
265 } | |
266 | |
267 return true; | |
268 } | |
269 | |
270 bool SourceStreamSet::Init(SourceStream* source) { | |
271 // TODO(sra): consume the rest of |source|. | |
272 return Init(source->Buffer(), source->Remaining()); | |
273 } | |
274 | |
275 bool SourceStreamSet::ReadSet(SourceStreamSet* set) { | |
276 uint32 stream_count = 0; | |
277 SourceStream* control_stream = this->stream(0); | |
278 if (!control_stream->ReadVarint32(&stream_count)) | |
279 return false; | |
280 | |
281 uint32 lengths[kMaxStreams] = {}; // i.e. all zero. | |
282 | |
283 for (size_t i = 0; i < stream_count; ++i) { | |
284 if (!control_stream->ReadVarint32(&lengths[i])) | |
285 return false; | |
286 } | |
287 | |
288 for (size_t i = 0; i < stream_count; ++i) { | |
289 if (!this->stream(i)->ReadSubstream(lengths[i], set->stream(i))) | |
290 return false; | |
291 } | |
292 return true; | |
293 } | |
294 | |
295 bool SourceStreamSet::Empty() const { | |
296 for (size_t i = 0; i < count_; ++i) { | |
297 if (streams_[i].Remaining() != 0) | |
298 return false; | |
299 } | |
300 return true; | |
301 } | |
302 | |
303 //////////////////////////////////////////////////////////////////////////////// | |
304 | |
305 SinkStreamSet::SinkStreamSet() | |
306 : count_(kMaxStreams) { | |
307 } | |
308 | |
309 SinkStreamSet::~SinkStreamSet() { | |
310 } | |
311 | |
312 void SinkStreamSet::Init(size_t stream_index_limit) { | |
313 count_ = stream_index_limit; | |
314 } | |
315 | |
316 // The header for a stream set for N streams is serialized as | |
317 // <version><N><length1><length2>...<lengthN> | |
318 void SinkStreamSet::CopyHeaderTo(SinkStream* header) { | |
319 header->WriteVarint32(kStreamsSerializationFormatVersion); | |
320 header->WriteVarint32(count_); | |
321 for (size_t i = 0; i < count_; ++i) { | |
322 header->WriteVarint32(stream(i)->Length()); | |
323 } | |
324 } | |
325 | |
326 // Writes |this| to |combined_stream|. See SourceStreamSet::Init for the layout | |
327 // of the stream metadata and contents. | |
328 bool SinkStreamSet::CopyTo(SinkStream *combined_stream) { | |
329 SinkStream header; | |
330 CopyHeaderTo(&header); | |
331 combined_stream->Append(&header); | |
332 for (size_t i = 0; i < count_; ++i) { | |
333 combined_stream->Append(stream(i)); | |
334 } | |
335 return true; | |
336 } | |
337 | |
338 namespace { | |
339 bool Write(int file_descriptor, SinkStream* sink) { | |
340 size_t length = sink->Length(); | |
341 const void *buffer = sink->Buffer(); | |
342 int bytes_written = _write(file_descriptor, buffer, length); | |
343 return bytes_written == length; | |
344 } | |
345 } | |
346 | |
347 bool SinkStreamSet::CopyToFileDescriptor(int file_descriptor) { | |
348 SinkStream header; | |
349 CopyHeaderTo(&header); | |
350 if (!Write(file_descriptor, &header)) | |
351 return false; | |
352 for (size_t i = 0; i < count_; ++i) { | |
353 if (!Write(file_descriptor, stream(i))) | |
354 return false; | |
355 } | |
356 return true; | |
357 } | |
358 | |
359 bool SinkStreamSet::WriteSet(SinkStreamSet* set) { | |
360 uint32 lengths[kMaxStreams]; | |
361 // 'stream_count' includes all non-empty streams and all empty stream numbered | |
362 // lower than a non-empty stream. | |
363 size_t stream_count = 0; | |
364 for (size_t i = 0; i < kMaxStreams; ++i) { | |
365 SinkStream* stream = set->stream(i); | |
366 lengths[i] = stream->Length(); | |
367 if (lengths[i] > 0) | |
368 stream_count = i + 1; | |
369 } | |
370 | |
371 SinkStream* control_stream = this->stream(0); | |
372 control_stream->WriteVarint32(stream_count); | |
373 for (size_t i = 0; i < stream_count; ++i) { | |
374 control_stream->WriteVarint32(lengths[i]); | |
375 } | |
376 | |
377 for (size_t i = 0; i < stream_count; ++i) { | |
378 this->stream(i)->Append(set->stream(i)); | |
379 } | |
380 return true; | |
381 } | |
382 | |
383 } // namespace | |
OLD | NEW |