OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 // Streams classes. |
| 6 // |
| 7 // These memory-resident streams are used for serializing data into a sequential |
| 8 // region of memory. |
| 9 // |
| 10 // Streams are divided into SourceStreams for reading and SinkStreams for |
| 11 // writing. Streams are aggregated into Sets which allows several streams to be |
| 12 // used at once. Example: we can write A1, B1, A2, B2 but achieve the memory |
| 13 // layout A1 A2 B1 B2 by writing 'A's to one stream and 'B's to another. |
| 14 // |
| 15 // The aggregated streams are important to Courgette's compression efficiency, |
| 16 // we use it to cluster similar kinds of data which helps to generate longer |
| 17 // common subsequences and repeated sequences. |
| 18 |
| 19 #include "courgette/streams.h" |
| 20 |
| 21 #include <io.h> |
| 22 #include <memory.h> |
| 23 |
| 24 #include "base/basictypes.h" |
| 25 |
| 26 namespace courgette { |
| 27 |
| 28 // Update this version number if the serialization format of a StreamSet |
| 29 // changes. |
| 30 static const unsigned int kStreamsSerializationFormatVersion = 20090218; |
| 31 |
| 32 // |
| 33 // This is a cut down Varint implementation, implementing only what we use for |
| 34 // streams. |
| 35 // |
| 36 class Varint { |
| 37 public: |
| 38 // Maximum lengths of varint encoding of uint32 |
| 39 static const int kMax32 = 5; |
| 40 |
| 41 // Parses a Varint32 encoded value from |source| and stores it in |output|, |
| 42 // and returns a pointer to the following byte. Returns NULL if a valid |
| 43 // varint value was not found before |limit|. |
| 44 static const uint8* Parse32WithLimit(const uint8* source, const uint8* limit, |
| 45 uint32* output); |
| 46 |
| 47 // Writes the Varint32 encoded representation of |value| to buffer |
| 48 // |destination|. |destination| must have sufficient length to hold kMax32 |
| 49 // bytes. Returns a pointer to the byte just past the last encoded byte. |
| 50 static uint8* Encode32(uint8* destination, uint32 value); |
| 51 }; |
| 52 |
| 53 // Parses a Varint32 encoded unsigned number from |source|. The Varint32 |
| 54 // encoding is a little-endian sequence of bytes containing base-128 digits, |
| 55 // with the high order bit set to indicate if there are more digits. |
| 56 // |
| 57 // For each byte, we mask out the digit and 'or' it into the right place in the |
| 58 // result. |
| 59 // |
| 60 // The digit loop is unrolled for performance. It usually exits after the first |
| 61 // one or two digits. |
| 62 const uint8* Varint::Parse32WithLimit(const uint8* source, |
| 63 const uint8* limit, |
| 64 uint32* output) { |
| 65 uint32 digit, result; |
| 66 if (source >= limit) |
| 67 return NULL; |
| 68 digit = *(source++); |
| 69 result = digit & 127; |
| 70 if (digit < 128) { |
| 71 *output = result; |
| 72 return source; |
| 73 } |
| 74 |
| 75 if (source >= limit) |
| 76 return NULL; |
| 77 digit = *(source++); |
| 78 result |= (digit & 127) << 7; |
| 79 if (digit < 128) { |
| 80 *output = result; |
| 81 return source; |
| 82 } |
| 83 |
| 84 if (source >= limit) |
| 85 return NULL; |
| 86 digit = *(source++); |
| 87 result |= (digit & 127) << 14; |
| 88 if (digit < 128) { |
| 89 *output = result; |
| 90 return source; |
| 91 } |
| 92 |
| 93 if (source >= limit) |
| 94 return NULL; |
| 95 digit = *(source++); |
| 96 result |= (digit & 127) << 21; |
| 97 if (digit < 128) { |
| 98 *output = result; |
| 99 return source; |
| 100 } |
| 101 |
| 102 if (source >= limit) |
| 103 return NULL; |
| 104 digit = *(source++); |
| 105 result |= (digit & 127) << 28; |
| 106 if (digit < 128) { |
| 107 *output = result; |
| 108 return source; |
| 109 } |
| 110 |
| 111 return NULL; // Value is too long to be a Varint32. |
| 112 } |
| 113 |
| 114 // Write the base-128 digits in little-endian order. All except the last digit |
| 115 // have the high bit set to indicate more digits. |
| 116 inline uint8* Varint::Encode32(uint8* destination, uint32 value) { |
| 117 while (value >= 128) { |
| 118 *(destination++) = value | 128; |
| 119 value = value >> 7; |
| 120 } |
| 121 *(destination++) = value; |
| 122 return destination; |
| 123 } |
| 124 |
| 125 void SourceStream::Init(const SinkStream& sink) { |
| 126 Init(sink.Buffer(), sink.Length()); |
| 127 } |
| 128 |
| 129 bool SourceStream::Read(void* destination, size_t count) { |
| 130 if (current_ + count > end_) |
| 131 return false; |
| 132 memcpy(destination, current_, count); |
| 133 current_ += count; |
| 134 return true; |
| 135 } |
| 136 |
| 137 bool SourceStream::ReadVarint32(uint32* output_value) { |
| 138 const uint8* after = Varint::Parse32WithLimit(current_, end_, output_value); |
| 139 if (!after) |
| 140 return false; |
| 141 current_ = after; |
| 142 return true; |
| 143 } |
| 144 |
| 145 bool SourceStream::ReadVarint32Signed(int32* output_value) { |
| 146 // Signed numbers are encoded as unsigned numbers so that numbers nearer zero |
| 147 // have shorter varint encoding. |
| 148 // 0000xxxx encoded as 000xxxx0. |
| 149 // 1111xxxx encoded as 000yyyy1 where yyyy is complement of xxxx. |
| 150 uint32 unsigned_value; |
| 151 if (!ReadVarint32(&unsigned_value)) |
| 152 return false; |
| 153 if (unsigned_value & 1) |
| 154 *output_value = ~static_cast<int32>(unsigned_value >> 1); |
| 155 else |
| 156 *output_value = (unsigned_value >> 1); |
| 157 return true; |
| 158 } |
| 159 |
| 160 bool SourceStream::ShareSubstream(size_t offset, size_t length, |
| 161 SourceStream* substream) { |
| 162 if (offset > Remaining()) |
| 163 return false; |
| 164 if (length > Remaining() - offset) |
| 165 return false; |
| 166 substream->Init(current_ + offset, length); |
| 167 return true; |
| 168 } |
| 169 |
| 170 bool SourceStream::ReadSubstream(size_t length, SourceStream* substream) { |
| 171 if (!ShareSubstream(0, length, substream)) |
| 172 return false; |
| 173 current_ += length; |
| 174 return true; |
| 175 } |
| 176 |
| 177 bool SourceStream::Skip(size_t byte_count) { |
| 178 if (current_ + byte_count > end_) |
| 179 return false; |
| 180 current_ += byte_count; |
| 181 return true; |
| 182 } |
| 183 |
| 184 void SinkStream::Write(const void* data, size_t byte_count) { |
| 185 buffer_.append(static_cast<const char*>(data), byte_count); |
| 186 } |
| 187 |
| 188 void SinkStream::WriteVarint32(uint32 value) { |
| 189 uint8 buffer[Varint::kMax32]; |
| 190 uint8* end = Varint::Encode32(buffer, value); |
| 191 Write(buffer, end - buffer); |
| 192 } |
| 193 |
| 194 void SinkStream::WriteVarint32Signed(int32 value) { |
| 195 // Encode signed numbers so that numbers nearer zero have shorter |
| 196 // varint encoding. |
| 197 // 0000xxxx encoded as 000xxxx0. |
| 198 // 1111xxxx encoded as 000yyyy1 where yyyy is complement of xxxx. |
| 199 if (value < 0) |
| 200 WriteVarint32(~value * 2 + 1); |
| 201 else |
| 202 WriteVarint32(value * 2); |
| 203 } |
| 204 |
| 205 void SinkStream::Append(SinkStream* other) { |
| 206 Write(other->buffer_.c_str(), other->buffer_.size()); |
| 207 other->buffer_.clear(); |
| 208 other->buffer_.reserve(0); // Non-binding request to reduce storage. |
| 209 } |
| 210 |
| 211 //////////////////////////////////////////////////////////////////////////////// |
| 212 |
| 213 SourceStreamSet::SourceStreamSet() |
| 214 : count_(kMaxStreams) { |
| 215 } |
| 216 |
| 217 SourceStreamSet::~SourceStreamSet() { |
| 218 } |
| 219 |
| 220 |
| 221 // Initializes from |source|. |
| 222 // The stream set for N streams is serialized as a header |
| 223 // <version><N><length1><length2>...<lengthN> |
| 224 // followed by the stream contents |
| 225 // <bytes1><bytes2>...<bytesN> |
| 226 // |
| 227 bool SourceStreamSet::Init(const void* source, size_t byte_count) { |
| 228 const uint8* start = static_cast<const uint8*>(source); |
| 229 const uint8* end = start + byte_count; |
| 230 |
| 231 unsigned int version; |
| 232 const uint8* finger = Varint::Parse32WithLimit(start, end, &version); |
| 233 if (finger == NULL) |
| 234 return false; |
| 235 if (version != kStreamsSerializationFormatVersion) |
| 236 return false; |
| 237 |
| 238 unsigned int count; |
| 239 finger = Varint::Parse32WithLimit(finger, end, &count); |
| 240 if (finger == NULL) |
| 241 return false; |
| 242 if (count > kMaxStreams) |
| 243 return false; |
| 244 |
| 245 count_ = count; |
| 246 |
| 247 unsigned int lengths[kMaxStreams]; |
| 248 size_t accumulated_length = 0; |
| 249 |
| 250 for (size_t i = 0; i < count_; ++i) { |
| 251 finger = Varint::Parse32WithLimit(finger, end, &lengths[i]); |
| 252 if (finger == NULL) |
| 253 return false; |
| 254 accumulated_length += lengths[i]; |
| 255 } |
| 256 |
| 257 // Remaining bytes should add up to sum of lengths. |
| 258 if (end - finger != accumulated_length) |
| 259 return false; |
| 260 |
| 261 accumulated_length = finger - start; |
| 262 for (size_t i = 0; i < count_; ++i) { |
| 263 stream(i)->Init(start + accumulated_length, lengths[i]); |
| 264 accumulated_length += lengths[i]; |
| 265 } |
| 266 |
| 267 return true; |
| 268 } |
| 269 |
| 270 bool SourceStreamSet::Init(SourceStream* source) { |
| 271 // TODO(sra): consume the rest of |source|. |
| 272 return Init(source->Buffer(), source->Remaining()); |
| 273 } |
| 274 |
| 275 bool SourceStreamSet::ReadSet(SourceStreamSet* set) { |
| 276 uint32 stream_count = 0; |
| 277 SourceStream* control_stream = this->stream(0); |
| 278 if (!control_stream->ReadVarint32(&stream_count)) |
| 279 return false; |
| 280 |
| 281 uint32 lengths[kMaxStreams] = {}; // i.e. all zero. |
| 282 |
| 283 for (size_t i = 0; i < stream_count; ++i) { |
| 284 if (!control_stream->ReadVarint32(&lengths[i])) |
| 285 return false; |
| 286 } |
| 287 |
| 288 for (size_t i = 0; i < stream_count; ++i) { |
| 289 if (!this->stream(i)->ReadSubstream(lengths[i], set->stream(i))) |
| 290 return false; |
| 291 } |
| 292 return true; |
| 293 } |
| 294 |
| 295 bool SourceStreamSet::Empty() const { |
| 296 for (size_t i = 0; i < count_; ++i) { |
| 297 if (streams_[i].Remaining() != 0) |
| 298 return false; |
| 299 } |
| 300 return true; |
| 301 } |
| 302 |
| 303 //////////////////////////////////////////////////////////////////////////////// |
| 304 |
| 305 SinkStreamSet::SinkStreamSet() |
| 306 : count_(kMaxStreams) { |
| 307 } |
| 308 |
| 309 SinkStreamSet::~SinkStreamSet() { |
| 310 } |
| 311 |
| 312 void SinkStreamSet::Init(size_t stream_index_limit) { |
| 313 count_ = stream_index_limit; |
| 314 } |
| 315 |
| 316 // The header for a stream set for N streams is serialized as |
| 317 // <version><N><length1><length2>...<lengthN> |
| 318 void SinkStreamSet::CopyHeaderTo(SinkStream* header) { |
| 319 header->WriteVarint32(kStreamsSerializationFormatVersion); |
| 320 header->WriteVarint32(count_); |
| 321 for (size_t i = 0; i < count_; ++i) { |
| 322 header->WriteVarint32(stream(i)->Length()); |
| 323 } |
| 324 } |
| 325 |
| 326 // Writes |this| to |combined_stream|. See SourceStreamSet::Init for the layout |
| 327 // of the stream metadata and contents. |
| 328 bool SinkStreamSet::CopyTo(SinkStream *combined_stream) { |
| 329 SinkStream header; |
| 330 CopyHeaderTo(&header); |
| 331 combined_stream->Append(&header); |
| 332 for (size_t i = 0; i < count_; ++i) { |
| 333 combined_stream->Append(stream(i)); |
| 334 } |
| 335 return true; |
| 336 } |
| 337 |
| 338 namespace { |
| 339 bool Write(int file_descriptor, SinkStream* sink) { |
| 340 size_t length = sink->Length(); |
| 341 const void *buffer = sink->Buffer(); |
| 342 int bytes_written = _write(file_descriptor, buffer, length); |
| 343 return bytes_written == length; |
| 344 } |
| 345 } |
| 346 |
| 347 bool SinkStreamSet::CopyToFileDescriptor(int file_descriptor) { |
| 348 SinkStream header; |
| 349 CopyHeaderTo(&header); |
| 350 if (!Write(file_descriptor, &header)) |
| 351 return false; |
| 352 for (size_t i = 0; i < count_; ++i) { |
| 353 if (!Write(file_descriptor, stream(i))) |
| 354 return false; |
| 355 } |
| 356 return true; |
| 357 } |
| 358 |
| 359 bool SinkStreamSet::WriteSet(SinkStreamSet* set) { |
| 360 uint32 lengths[kMaxStreams]; |
| 361 // 'stream_count' includes all non-empty streams and all empty stream numbered |
| 362 // lower than a non-empty stream. |
| 363 size_t stream_count = 0; |
| 364 for (size_t i = 0; i < kMaxStreams; ++i) { |
| 365 SinkStream* stream = set->stream(i); |
| 366 lengths[i] = stream->Length(); |
| 367 if (lengths[i] > 0) |
| 368 stream_count = i + 1; |
| 369 } |
| 370 |
| 371 SinkStream* control_stream = this->stream(0); |
| 372 control_stream->WriteVarint32(stream_count); |
| 373 for (size_t i = 0; i < stream_count; ++i) { |
| 374 control_stream->WriteVarint32(lengths[i]); |
| 375 } |
| 376 |
| 377 for (size_t i = 0; i < stream_count; ++i) { |
| 378 this->stream(i)->Append(set->stream(i)); |
| 379 } |
| 380 return true; |
| 381 } |
| 382 |
| 383 } // namespace |
OLD | NEW |