OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Streams classes. | 5 // Streams classes. |
6 // | 6 // |
7 // These memory-resident streams are used for serializing data into a sequential | 7 // These memory-resident streams are used for serializing data into a sequential |
8 // region of memory. | 8 // region of memory. |
9 // | 9 // |
10 // Streams are divided into SourceStreams for reading and SinkStreams for | 10 // Streams are divided into SourceStreams for reading and SinkStreams for |
11 // writing. Streams are aggregated into Sets which allows several streams to be | 11 // writing. Streams are aggregated into Sets which allows several streams to be |
12 // used at once. Example: we can write A1, B1, A2, B2 but achieve the memory | 12 // used at once. Example: we can write A1, B1, A2, B2 but achieve the memory |
13 // layout A1 A2 B1 B2 by writing 'A's to one stream and 'B's to another. | 13 // layout A1 A2 B1 B2 by writing 'A's to one stream and 'B's to another. |
14 // | 14 // |
15 // The aggregated streams are important to Courgette's compression efficiency, | 15 // The aggregated streams are important to Courgette's compression efficiency, |
16 // we use it to cluster similar kinds of data which helps to generate longer | 16 // we use it to cluster similar kinds of data which helps to generate longer |
17 // common subsequences and repeated sequences. | 17 // common subsequences and repeated sequences. |
18 | 18 |
19 #include "courgette/streams.h" | 19 #include "courgette/streams.h" |
20 | 20 |
21 #include <memory.h> | 21 #include <memory.h> |
| 22 #include <stddef.h> |
| 23 #include <stdint.h> |
22 | 24 |
23 #include "base/basictypes.h" | |
24 #include "base/logging.h" | 25 #include "base/logging.h" |
25 | 26 |
26 namespace courgette { | 27 namespace courgette { |
27 | 28 |
28 // Update this version number if the serialization format of a StreamSet | 29 // Update this version number if the serialization format of a StreamSet |
29 // changes. | 30 // changes. |
30 static const unsigned int kStreamsSerializationFormatVersion = 20090218; | 31 static const unsigned int kStreamsSerializationFormatVersion = 20090218; |
31 | 32 |
32 // | 33 // |
33 // This is a cut down Varint implementation, implementing only what we use for | 34 // This is a cut down Varint implementation, implementing only what we use for |
34 // streams. | 35 // streams. |
35 // | 36 // |
36 class Varint { | 37 class Varint { |
37 public: | 38 public: |
38 // Maximum lengths of varint encoding of uint32 | 39 // Maximum lengths of varint encoding of uint32_t |
39 static const int kMax32 = 5; | 40 static const int kMax32 = 5; |
40 | 41 |
41 // Parses a Varint32 encoded value from |source| and stores it in |output|, | 42 // Parses a Varint32 encoded value from |source| and stores it in |output|, |
42 // and returns a pointer to the following byte. Returns NULL if a valid | 43 // and returns a pointer to the following byte. Returns NULL if a valid |
43 // varint value was not found before |limit|. | 44 // varint value was not found before |limit|. |
44 static const uint8* Parse32WithLimit(const uint8* source, const uint8* limit, | 45 static const uint8_t* Parse32WithLimit(const uint8_t* source, |
45 uint32* output); | 46 const uint8_t* limit, |
| 47 uint32_t* output); |
46 | 48 |
47 // Writes the Varint32 encoded representation of |value| to buffer | 49 // Writes the Varint32 encoded representation of |value| to buffer |
48 // |destination|. |destination| must have sufficient length to hold kMax32 | 50 // |destination|. |destination| must have sufficient length to hold kMax32 |
49 // bytes. Returns a pointer to the byte just past the last encoded byte. | 51 // bytes. Returns a pointer to the byte just past the last encoded byte. |
50 static uint8* Encode32(uint8* destination, uint32 value); | 52 static uint8_t* Encode32(uint8_t* destination, uint32_t value); |
51 }; | 53 }; |
52 | 54 |
53 // Parses a Varint32 encoded unsigned number from |source|. The Varint32 | 55 // Parses a Varint32 encoded unsigned number from |source|. The Varint32 |
54 // encoding is a little-endian sequence of bytes containing base-128 digits, | 56 // encoding is a little-endian sequence of bytes containing base-128 digits, |
55 // with the high order bit set to indicate if there are more digits. | 57 // with the high order bit set to indicate if there are more digits. |
56 // | 58 // |
57 // For each byte, we mask out the digit and 'or' it into the right place in the | 59 // For each byte, we mask out the digit and 'or' it into the right place in the |
58 // result. | 60 // result. |
59 // | 61 // |
60 // The digit loop is unrolled for performance. It usually exits after the first | 62 // The digit loop is unrolled for performance. It usually exits after the first |
61 // one or two digits. | 63 // one or two digits. |
62 const uint8* Varint::Parse32WithLimit(const uint8* source, | 64 const uint8_t* Varint::Parse32WithLimit(const uint8_t* source, |
63 const uint8* limit, | 65 const uint8_t* limit, |
64 uint32* output) { | 66 uint32_t* output) { |
65 uint32 digit, result; | 67 uint32_t digit, result; |
66 if (source >= limit) | 68 if (source >= limit) |
67 return NULL; | 69 return NULL; |
68 digit = *(source++); | 70 digit = *(source++); |
69 result = digit & 127; | 71 result = digit & 127; |
70 if (digit < 128) { | 72 if (digit < 128) { |
71 *output = result; | 73 *output = result; |
72 return source; | 74 return source; |
73 } | 75 } |
74 | 76 |
75 if (source >= limit) | 77 if (source >= limit) |
(...skipping 30 matching lines...) Expand all Loading... |
106 if (digit < 128) { | 108 if (digit < 128) { |
107 *output = result; | 109 *output = result; |
108 return source; | 110 return source; |
109 } | 111 } |
110 | 112 |
111 return NULL; // Value is too long to be a Varint32. | 113 return NULL; // Value is too long to be a Varint32. |
112 } | 114 } |
113 | 115 |
114 // Write the base-128 digits in little-endian order. All except the last digit | 116 // Write the base-128 digits in little-endian order. All except the last digit |
115 // have the high bit set to indicate more digits. | 117 // have the high bit set to indicate more digits. |
116 inline uint8* Varint::Encode32(uint8* destination, uint32 value) { | 118 inline uint8_t* Varint::Encode32(uint8_t* destination, uint32_t value) { |
117 while (value >= 128) { | 119 while (value >= 128) { |
118 *(destination++) = static_cast<uint8>(value) | 128; | 120 *(destination++) = static_cast<uint8_t>(value) | 128; |
119 value = value >> 7; | 121 value = value >> 7; |
120 } | 122 } |
121 *(destination++) = static_cast<uint8>(value); | 123 *(destination++) = static_cast<uint8_t>(value); |
122 return destination; | 124 return destination; |
123 } | 125 } |
124 | 126 |
125 void SourceStream::Init(const SinkStream& sink) { | 127 void SourceStream::Init(const SinkStream& sink) { |
126 Init(sink.Buffer(), sink.Length()); | 128 Init(sink.Buffer(), sink.Length()); |
127 } | 129 } |
128 | 130 |
129 bool SourceStream::Read(void* destination, size_t count) { | 131 bool SourceStream::Read(void* destination, size_t count) { |
130 if (current_ + count > end_) | 132 if (current_ + count > end_) |
131 return false; | 133 return false; |
132 memcpy(destination, current_, count); | 134 memcpy(destination, current_, count); |
133 current_ += count; | 135 current_ += count; |
134 return true; | 136 return true; |
135 } | 137 } |
136 | 138 |
137 bool SourceStream::ReadVarint32(uint32* output_value) { | 139 bool SourceStream::ReadVarint32(uint32_t* output_value) { |
138 const uint8* after = Varint::Parse32WithLimit(current_, end_, output_value); | 140 const uint8_t* after = Varint::Parse32WithLimit(current_, end_, output_value); |
139 if (!after) | 141 if (!after) |
140 return false; | 142 return false; |
141 current_ = after; | 143 current_ = after; |
142 return true; | 144 return true; |
143 } | 145 } |
144 | 146 |
145 bool SourceStream::ReadVarint32Signed(int32* output_value) { | 147 bool SourceStream::ReadVarint32Signed(int32_t* output_value) { |
146 // Signed numbers are encoded as unsigned numbers so that numbers nearer zero | 148 // Signed numbers are encoded as unsigned numbers so that numbers nearer zero |
147 // have shorter varint encoding. | 149 // have shorter varint encoding. |
148 // 0000xxxx encoded as 000xxxx0. | 150 // 0000xxxx encoded as 000xxxx0. |
149 // 1111xxxx encoded as 000yyyy1 where yyyy is complement of xxxx. | 151 // 1111xxxx encoded as 000yyyy1 where yyyy is complement of xxxx. |
150 uint32 unsigned_value; | 152 uint32_t unsigned_value; |
151 if (!ReadVarint32(&unsigned_value)) | 153 if (!ReadVarint32(&unsigned_value)) |
152 return false; | 154 return false; |
153 if (unsigned_value & 1) | 155 if (unsigned_value & 1) |
154 *output_value = ~static_cast<int32>(unsigned_value >> 1); | 156 *output_value = ~static_cast<int32_t>(unsigned_value >> 1); |
155 else | 157 else |
156 *output_value = (unsigned_value >> 1); | 158 *output_value = (unsigned_value >> 1); |
157 return true; | 159 return true; |
158 } | 160 } |
159 | 161 |
160 bool SourceStream::ShareSubstream(size_t offset, size_t length, | 162 bool SourceStream::ShareSubstream(size_t offset, size_t length, |
161 SourceStream* substream) { | 163 SourceStream* substream) { |
162 if (offset > Remaining()) | 164 if (offset > Remaining()) |
163 return false; | 165 return false; |
164 if (length > Remaining() - offset) | 166 if (length > Remaining() - offset) |
(...skipping 13 matching lines...) Expand all Loading... |
178 if (current_ + byte_count > end_) | 180 if (current_ + byte_count > end_) |
179 return false; | 181 return false; |
180 current_ += byte_count; | 182 current_ += byte_count; |
181 return true; | 183 return true; |
182 } | 184 } |
183 | 185 |
184 CheckBool SinkStream::Write(const void* data, size_t byte_count) { | 186 CheckBool SinkStream::Write(const void* data, size_t byte_count) { |
185 return buffer_.append(static_cast<const char*>(data), byte_count); | 187 return buffer_.append(static_cast<const char*>(data), byte_count); |
186 } | 188 } |
187 | 189 |
188 CheckBool SinkStream::WriteVarint32(uint32 value) { | 190 CheckBool SinkStream::WriteVarint32(uint32_t value) { |
189 uint8 buffer[Varint::kMax32]; | 191 uint8_t buffer[Varint::kMax32]; |
190 uint8* end = Varint::Encode32(buffer, value); | 192 uint8_t* end = Varint::Encode32(buffer, value); |
191 return Write(buffer, end - buffer); | 193 return Write(buffer, end - buffer); |
192 } | 194 } |
193 | 195 |
194 CheckBool SinkStream::WriteVarint32Signed(int32 value) { | 196 CheckBool SinkStream::WriteVarint32Signed(int32_t value) { |
195 // Encode signed numbers so that numbers nearer zero have shorter | 197 // Encode signed numbers so that numbers nearer zero have shorter |
196 // varint encoding. | 198 // varint encoding. |
197 // 0000xxxx encoded as 000xxxx0. | 199 // 0000xxxx encoded as 000xxxx0. |
198 // 1111xxxx encoded as 000yyyy1 where yyyy is complement of xxxx. | 200 // 1111xxxx encoded as 000yyyy1 where yyyy is complement of xxxx. |
199 bool ret; | 201 bool ret; |
200 if (value < 0) | 202 if (value < 0) |
201 ret = WriteVarint32(~value * 2 + 1); | 203 ret = WriteVarint32(~value * 2 + 1); |
202 else | 204 else |
203 ret = WriteVarint32(value * 2); | 205 ret = WriteVarint32(value * 2); |
204 return ret; | 206 return ret; |
205 } | 207 } |
206 | 208 |
207 CheckBool SinkStream::WriteSizeVarint32(size_t value) { | 209 CheckBool SinkStream::WriteSizeVarint32(size_t value) { |
208 uint32 narrowed_value = static_cast<uint32>(value); | 210 uint32_t narrowed_value = static_cast<uint32_t>(value); |
209 // On 32-bit, the compiler should figure out this test always fails. | 211 // On 32-bit, the compiler should figure out this test always fails. |
210 LOG_ASSERT(value == narrowed_value); | 212 LOG_ASSERT(value == narrowed_value); |
211 return WriteVarint32(narrowed_value); | 213 return WriteVarint32(narrowed_value); |
212 } | 214 } |
213 | 215 |
214 CheckBool SinkStream::Append(SinkStream* other) { | 216 CheckBool SinkStream::Append(SinkStream* other) { |
215 bool ret = Write(other->buffer_.data(), other->buffer_.size()); | 217 bool ret = Write(other->buffer_.data(), other->buffer_.size()); |
216 if (ret) | 218 if (ret) |
217 other->Retire(); | 219 other->Retire(); |
218 return ret; | 220 return ret; |
(...skipping 13 matching lines...) Expand all Loading... |
232 } | 234 } |
233 | 235 |
234 | 236 |
235 // Initializes from |source|. | 237 // Initializes from |source|. |
236 // The stream set for N streams is serialized as a header | 238 // The stream set for N streams is serialized as a header |
237 // <version><N><length1><length2>...<lengthN> | 239 // <version><N><length1><length2>...<lengthN> |
238 // followed by the stream contents | 240 // followed by the stream contents |
239 // <bytes1><bytes2>...<bytesN> | 241 // <bytes1><bytes2>...<bytesN> |
240 // | 242 // |
241 bool SourceStreamSet::Init(const void* source, size_t byte_count) { | 243 bool SourceStreamSet::Init(const void* source, size_t byte_count) { |
242 const uint8* start = static_cast<const uint8*>(source); | 244 const uint8_t* start = static_cast<const uint8_t*>(source); |
243 const uint8* end = start + byte_count; | 245 const uint8_t* end = start + byte_count; |
244 | 246 |
245 unsigned int version; | 247 unsigned int version; |
246 const uint8* finger = Varint::Parse32WithLimit(start, end, &version); | 248 const uint8_t* finger = Varint::Parse32WithLimit(start, end, &version); |
247 if (finger == NULL) | 249 if (finger == NULL) |
248 return false; | 250 return false; |
249 if (version != kStreamsSerializationFormatVersion) | 251 if (version != kStreamsSerializationFormatVersion) |
250 return false; | 252 return false; |
251 | 253 |
252 unsigned int count; | 254 unsigned int count; |
253 finger = Varint::Parse32WithLimit(finger, end, &count); | 255 finger = Varint::Parse32WithLimit(finger, end, &count); |
254 if (finger == NULL) | 256 if (finger == NULL) |
255 return false; | 257 return false; |
256 if (count > kMaxStreams) | 258 if (count > kMaxStreams) |
(...skipping 23 matching lines...) Expand all Loading... |
280 | 282 |
281 return true; | 283 return true; |
282 } | 284 } |
283 | 285 |
284 bool SourceStreamSet::Init(SourceStream* source) { | 286 bool SourceStreamSet::Init(SourceStream* source) { |
285 // TODO(sra): consume the rest of |source|. | 287 // TODO(sra): consume the rest of |source|. |
286 return Init(source->Buffer(), source->Remaining()); | 288 return Init(source->Buffer(), source->Remaining()); |
287 } | 289 } |
288 | 290 |
289 bool SourceStreamSet::ReadSet(SourceStreamSet* set) { | 291 bool SourceStreamSet::ReadSet(SourceStreamSet* set) { |
290 uint32 stream_count = 0; | 292 uint32_t stream_count = 0; |
291 SourceStream* control_stream = this->stream(0); | 293 SourceStream* control_stream = this->stream(0); |
292 if (!control_stream->ReadVarint32(&stream_count)) | 294 if (!control_stream->ReadVarint32(&stream_count)) |
293 return false; | 295 return false; |
294 | 296 |
295 uint32 lengths[kMaxStreams] = {}; // i.e. all zero. | 297 uint32_t lengths[kMaxStreams] = {}; // i.e. all zero. |
296 | 298 |
297 for (size_t i = 0; i < stream_count; ++i) { | 299 for (size_t i = 0; i < stream_count; ++i) { |
298 if (!control_stream->ReadVarint32(&lengths[i])) | 300 if (!control_stream->ReadVarint32(&lengths[i])) |
299 return false; | 301 return false; |
300 } | 302 } |
301 | 303 |
302 for (size_t i = 0; i < stream_count; ++i) { | 304 for (size_t i = 0; i < stream_count; ++i) { |
303 if (!this->stream(i)->ReadSubstream(lengths[i], set->stream(i))) | 305 if (!this->stream(i)->ReadSubstream(lengths[i], set->stream(i))) |
304 return false; | 306 return false; |
305 } | 307 } |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
357 if (ret) { | 359 if (ret) { |
358 ret = combined_stream->Append(&header); | 360 ret = combined_stream->Append(&header); |
359 for (size_t i = 0; ret && i < count_; ++i) { | 361 for (size_t i = 0; ret && i < count_; ++i) { |
360 ret = combined_stream->Append(stream(i)); | 362 ret = combined_stream->Append(stream(i)); |
361 } | 363 } |
362 } | 364 } |
363 return ret; | 365 return ret; |
364 } | 366 } |
365 | 367 |
366 CheckBool SinkStreamSet::WriteSet(SinkStreamSet* set) { | 368 CheckBool SinkStreamSet::WriteSet(SinkStreamSet* set) { |
367 uint32 lengths[kMaxStreams]; | 369 uint32_t lengths[kMaxStreams]; |
368 // 'stream_count' includes all non-empty streams and all empty stream numbered | 370 // 'stream_count' includes all non-empty streams and all empty stream numbered |
369 // lower than a non-empty stream. | 371 // lower than a non-empty stream. |
370 size_t stream_count = 0; | 372 size_t stream_count = 0; |
371 for (size_t i = 0; i < kMaxStreams; ++i) { | 373 for (size_t i = 0; i < kMaxStreams; ++i) { |
372 SinkStream* stream = set->stream(i); | 374 SinkStream* stream = set->stream(i); |
373 lengths[i] = static_cast<uint32>(stream->Length()); | 375 lengths[i] = static_cast<uint32_t>(stream->Length()); |
374 if (lengths[i] > 0) | 376 if (lengths[i] > 0) |
375 stream_count = i + 1; | 377 stream_count = i + 1; |
376 } | 378 } |
377 | 379 |
378 SinkStream* control_stream = this->stream(0); | 380 SinkStream* control_stream = this->stream(0); |
379 bool ret = control_stream->WriteSizeVarint32(stream_count); | 381 bool ret = control_stream->WriteSizeVarint32(stream_count); |
380 for (size_t i = 0; ret && i < stream_count; ++i) { | 382 for (size_t i = 0; ret && i < stream_count; ++i) { |
381 ret = control_stream->WriteSizeVarint32(lengths[i]); | 383 ret = control_stream->WriteSizeVarint32(lengths[i]); |
382 } | 384 } |
383 | 385 |
384 for (size_t i = 0; ret && i < stream_count; ++i) { | 386 for (size_t i = 0; ret && i < stream_count; ++i) { |
385 ret = this->stream(i)->Append(set->stream(i)); | 387 ret = this->stream(i)->Append(set->stream(i)); |
386 } | 388 } |
387 return ret; | 389 return ret; |
388 } | 390 } |
389 | 391 |
390 } // namespace | 392 } // namespace |
OLD | NEW |