| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 // Streams classes. | |
| 6 // | |
| 7 // These memory-resident streams are used for serializing data into a sequential | |
| 8 // region of memory. | |
| 9 // | |
| 10 // Streams are divided into SourceStreams for reading and SinkStreams for | |
| 11 // writing. Streams are aggregated into Sets which allows several streams to be | |
| 12 // used at once. Example: we can write A1, B1, A2, B2 but achieve the memory | |
| 13 // layout A1 A2 B1 B2 by writing 'A's to one stream and 'B's to another. | |
| 14 // | |
| 15 // The aggregated streams are important to Courgette's compression efficiency, | |
| 16 // we use it to cluster similar kinds of data which helps to generate longer | |
| 17 // common subsequences and repeated sequences. | |
| 18 | |
| 19 #include "third_party/courgette/streams.h" | |
| 20 | |
| 21 #include <io.h> | |
| 22 #include <memory.h> | |
| 23 | |
| 24 #include "base/basictypes.h" | |
| 25 | |
| 26 namespace courgette { | |
| 27 | |
| 28 // Update this version number if the serialization format of a StreamSet | |
| 29 // changes. | |
| 30 static const unsigned int kStreamsSerializationFormatVersion = 20090218; | |
| 31 | |
| 32 // | |
| 33 // This is a cut down Varint implementation, implementing only what we use for | |
| 34 // streams. The original implementation is at google3/util/coding/varint.h | |
| 35 // | |
| 36 class Varint { | |
| 37 public: | |
| 38 // Maximum lengths of varint encoding of uint32 | |
| 39 static const int kMax32 = 5; | |
| 40 | |
| 41 // Parses a Varint32 encoded value from |source| and stores it in |output|, | |
| 42 // and returns a pointer to the following byte. Returns NULL if a valid | |
| 43 // varint value was not found before |limit|. | |
| 44 static const uint8* Parse32WithLimit(const uint8* source, const uint8* limit, | |
| 45 uint32* output); | |
| 46 | |
| 47 // Writes the Varint32 encoded representation of |value| to buffer | |
| 48 // |destination|. |destination| must have sufficient length to hold kMax32 | |
| 49 // bytes. Returns a pointer to the byte just past the last encoded byte. | |
| 50 static uint8* Encode32(uint8* destination, uint32 value); | |
| 51 }; | |
| 52 | |
| 53 // Parses a Varint32 encoded unsigned number from |source|. The Varint32 | |
| 54 // encoding is a little-endian sequence of bytes containing base-128 digits, | |
| 55 // with the high order bit set to indicate if there are more digits. | |
| 56 // | |
| 57 // For each byte, we mask out the digit and 'or' it into the right place in the | |
| 58 // result. | |
| 59 // | |
| 60 // The digit loop is unrolled for performance. It usually exits after the first | |
| 61 // one or two digits. | |
| 62 const uint8* Varint::Parse32WithLimit(const uint8* source, | |
| 63 const uint8* limit, | |
| 64 uint32* output) { | |
| 65 uint32 digit, result; | |
| 66 if (source >= limit) | |
| 67 return NULL; | |
| 68 digit = *(source++); | |
| 69 result = digit & 127; | |
| 70 if (digit < 128) { | |
| 71 *output = result; | |
| 72 return source; | |
| 73 } | |
| 74 | |
| 75 if (source >= limit) | |
| 76 return NULL; | |
| 77 digit = *(source++); | |
| 78 result |= (digit & 127) << 7; | |
| 79 if (digit < 128) { | |
| 80 *output = result; | |
| 81 return source; | |
| 82 } | |
| 83 | |
| 84 if (source >= limit) | |
| 85 return NULL; | |
| 86 digit = *(source++); | |
| 87 result |= (digit & 127) << 14; | |
| 88 if (digit < 128) { | |
| 89 *output = result; | |
| 90 return source; | |
| 91 } | |
| 92 | |
| 93 if (source >= limit) | |
| 94 return NULL; | |
| 95 digit = *(source++); | |
| 96 result |= (digit & 127) << 21; | |
| 97 if (digit < 128) { | |
| 98 *output = result; | |
| 99 return source; | |
| 100 } | |
| 101 | |
| 102 if (source >= limit) | |
| 103 return NULL; | |
| 104 digit = *(source++); | |
| 105 result |= (digit & 127) << 28; | |
| 106 if (digit < 128) { | |
| 107 *output = result; | |
| 108 return source; | |
| 109 } | |
| 110 | |
| 111 return NULL; // Value is too long to be a Varint32. | |
| 112 } | |
| 113 | |
| 114 // Write the base-128 digits in little-endian order. All except the last digit | |
| 115 // have the high bit set to indicate more digits. | |
| 116 inline uint8* Varint::Encode32(uint8* destination, uint32 value) { | |
| 117 while (value >= 128) { | |
| 118 *(destination++) = value | 128; | |
| 119 value = value >> 7; | |
| 120 } | |
| 121 *(destination++) = value; | |
| 122 return destination; | |
| 123 } | |
| 124 | |
| 125 void SourceStream::Init(const SinkStream& sink) { | |
| 126 Init(sink.Buffer(), sink.Length()); | |
| 127 } | |
| 128 | |
| 129 bool SourceStream::Read(void* destination, size_t count) { | |
| 130 if (current_ + count > end_) | |
| 131 return false; | |
| 132 memcpy(destination, current_, count); | |
| 133 current_ += count; | |
| 134 return true; | |
| 135 } | |
| 136 | |
| 137 bool SourceStream::ReadVarint32(uint32* output_value) { | |
| 138 const uint8* after = Varint::Parse32WithLimit(current_, end_, output_value); | |
| 139 if (!after) | |
| 140 return false; | |
| 141 current_ = after; | |
| 142 return true; | |
| 143 } | |
| 144 | |
| 145 bool SourceStream::ReadVarint32Signed(int32* output_value) { | |
| 146 // Signed numbers are encoded as unsigned numbers so that numbers nearer zero | |
| 147 // have shorter varint encoding. | |
| 148 // 0000xxxx encoded as 000xxxx0. | |
| 149 // 1111xxxx encoded as 000yyyy1 where yyyy is complement of xxxx. | |
| 150 uint32 unsigned_value; | |
| 151 if (!ReadVarint32(&unsigned_value)) | |
| 152 return false; | |
| 153 if (unsigned_value & 1) | |
| 154 *output_value = ~static_cast<int32>(unsigned_value >> 1); | |
| 155 else | |
| 156 *output_value = (unsigned_value >> 1); | |
| 157 return true; | |
| 158 } | |
| 159 | |
| 160 bool SourceStream::ShareSubstream(size_t offset, size_t length, | |
| 161 SourceStream* substream) { | |
| 162 if (offset > Remaining()) | |
| 163 return false; | |
| 164 if (length > Remaining() - offset) | |
| 165 return false; | |
| 166 substream->Init(current_ + offset, length); | |
| 167 return true; | |
| 168 } | |
| 169 | |
| 170 bool SourceStream::ReadSubstream(size_t length, SourceStream* substream) { | |
| 171 if (!ShareSubstream(0, length, substream)) | |
| 172 return false; | |
| 173 current_ += length; | |
| 174 return true; | |
| 175 } | |
| 176 | |
| 177 bool SourceStream::Skip(size_t byte_count) { | |
| 178 if (current_ + byte_count > end_) | |
| 179 return false; | |
| 180 current_ += byte_count; | |
| 181 return true; | |
| 182 } | |
| 183 | |
| 184 void SinkStream::Write(const void* data, size_t byte_count) { | |
| 185 buffer_.append(static_cast<const char*>(data), byte_count); | |
| 186 } | |
| 187 | |
| 188 void SinkStream::WriteVarint32(uint32 value) { | |
| 189 uint8 buffer[Varint::kMax32]; | |
| 190 uint8* end = Varint::Encode32(buffer, value); | |
| 191 Write(buffer, end - buffer); | |
| 192 } | |
| 193 | |
| 194 void SinkStream::WriteVarint32Signed(int32 value) { | |
| 195 // Encode signed numbers so that numbers nearer zero have shorter | |
| 196 // varint encoding. | |
| 197 // 0000xxxx encoded as 000xxxx0. | |
| 198 // 1111xxxx encoded as 000yyyy1 where yyyy is complement of xxxx. | |
| 199 if (value < 0) | |
| 200 WriteVarint32(~value * 2 + 1); | |
| 201 else | |
| 202 WriteVarint32(value * 2); | |
| 203 } | |
| 204 | |
| 205 void SinkStream::Append(SinkStream* other) { | |
| 206 Write(other->buffer_.c_str(), other->buffer_.size()); | |
| 207 other->buffer_.clear(); | |
| 208 other->buffer_.reserve(0); // Non-binding request to reduce storage. | |
| 209 } | |
| 210 | |
| 211 //////////////////////////////////////////////////////////////////////////////// | |
| 212 | |
| 213 SourceStreamSet::SourceStreamSet() | |
| 214 : count_(kMaxStreams) { | |
| 215 } | |
| 216 | |
| 217 SourceStreamSet::~SourceStreamSet() { | |
| 218 } | |
| 219 | |
| 220 | |
| 221 // Initializes from |source|. | |
| 222 // The stream set for N streams is serialized as a header | |
| 223 // <version><N><length1><length2>...<lengthN> | |
| 224 // followed by the stream contents | |
| 225 // <bytes1><bytes2>...<bytesN> | |
| 226 // | |
| 227 bool SourceStreamSet::Init(const void* source, size_t byte_count) { | |
| 228 const uint8* start = static_cast<const uint8*>(source); | |
| 229 const uint8* end = start + byte_count; | |
| 230 | |
| 231 unsigned int version; | |
| 232 const uint8* finger = Varint::Parse32WithLimit(start, end, &version); | |
| 233 if (finger == NULL) | |
| 234 return false; | |
| 235 if (version != kStreamsSerializationFormatVersion) | |
| 236 return false; | |
| 237 | |
| 238 unsigned int count; | |
| 239 finger = Varint::Parse32WithLimit(finger, end, &count); | |
| 240 if (finger == NULL) | |
| 241 return false; | |
| 242 if (count > kMaxStreams) | |
| 243 return false; | |
| 244 | |
| 245 count_ = count; | |
| 246 | |
| 247 unsigned int lengths[kMaxStreams]; | |
| 248 size_t accumulated_length = 0; | |
| 249 | |
| 250 for (size_t i = 0; i < count_; ++i) { | |
| 251 finger = Varint::Parse32WithLimit(finger, end, &lengths[i]); | |
| 252 if (finger == NULL) | |
| 253 return false; | |
| 254 accumulated_length += lengths[i]; | |
| 255 } | |
| 256 | |
| 257 // Remaining bytes should add up to sum of lengths. | |
| 258 if (end - finger != accumulated_length) | |
| 259 return false; | |
| 260 | |
| 261 accumulated_length = finger - start; | |
| 262 for (size_t i = 0; i < count_; ++i) { | |
| 263 stream(i)->Init(start + accumulated_length, lengths[i]); | |
| 264 accumulated_length += lengths[i]; | |
| 265 } | |
| 266 | |
| 267 return true; | |
| 268 } | |
| 269 | |
| 270 bool SourceStreamSet::Init(SourceStream* source) { | |
| 271 // TODO(sra): consume the rest of |source|. | |
| 272 return Init(source->Buffer(), source->Remaining()); | |
| 273 } | |
| 274 | |
| 275 bool SourceStreamSet::ReadSet(SourceStreamSet* set) { | |
| 276 uint32 stream_count = 0; | |
| 277 SourceStream* control_stream = this->stream(0); | |
| 278 if (!control_stream->ReadVarint32(&stream_count)) | |
| 279 return false; | |
| 280 | |
| 281 uint32 lengths[kMaxStreams] = {}; // i.e. all zero. | |
| 282 | |
| 283 for (size_t i = 0; i < stream_count; ++i) { | |
| 284 if (!control_stream->ReadVarint32(&lengths[i])) | |
| 285 return false; | |
| 286 } | |
| 287 | |
| 288 for (size_t i = 0; i < stream_count; ++i) { | |
| 289 if (!this->stream(i)->ReadSubstream(lengths[i], set->stream(i))) | |
| 290 return false; | |
| 291 } | |
| 292 return true; | |
| 293 } | |
| 294 | |
| 295 bool SourceStreamSet::Empty() const { | |
| 296 for (size_t i = 0; i < count_; ++i) { | |
| 297 if (streams_[i].Remaining() != 0) | |
| 298 return false; | |
| 299 } | |
| 300 return true; | |
| 301 } | |
| 302 | |
| 303 //////////////////////////////////////////////////////////////////////////////// | |
| 304 | |
| 305 SinkStreamSet::SinkStreamSet() | |
| 306 : count_(kMaxStreams) { | |
| 307 } | |
| 308 | |
| 309 SinkStreamSet::~SinkStreamSet() { | |
| 310 } | |
| 311 | |
| 312 void SinkStreamSet::Init(size_t stream_index_limit) { | |
| 313 count_ = stream_index_limit; | |
| 314 } | |
| 315 | |
| 316 // The header for a stream set for N streams is serialized as | |
| 317 // <version><N><length1><length2>...<lengthN> | |
| 318 void SinkStreamSet::CopyHeaderTo(SinkStream* header) { | |
| 319 header->WriteVarint32(kStreamsSerializationFormatVersion); | |
| 320 header->WriteVarint32(count_); | |
| 321 for (size_t i = 0; i < count_; ++i) { | |
| 322 header->WriteVarint32(stream(i)->Length()); | |
| 323 } | |
| 324 } | |
| 325 | |
| 326 // Writes |this| to |combined_stream|. See SourceStreamSet::Init for the layout | |
| 327 // of the stream metadata and contents. | |
| 328 bool SinkStreamSet::CopyTo(SinkStream *combined_stream) { | |
| 329 SinkStream header; | |
| 330 CopyHeaderTo(&header); | |
| 331 combined_stream->Append(&header); | |
| 332 for (size_t i = 0; i < count_; ++i) { | |
| 333 combined_stream->Append(stream(i)); | |
| 334 } | |
| 335 return true; | |
| 336 } | |
| 337 | |
| 338 namespace { | |
| 339 bool Write(int file_descriptor, SinkStream* sink) { | |
| 340 size_t length = sink->Length(); | |
| 341 const void *buffer = sink->Buffer(); | |
| 342 int bytes_written = _write(file_descriptor, buffer, length); | |
| 343 return bytes_written == length; | |
| 344 } | |
| 345 } | |
| 346 | |
| 347 bool SinkStreamSet::CopyToFileDescriptor(int file_descriptor) { | |
| 348 SinkStream header; | |
| 349 CopyHeaderTo(&header); | |
| 350 if (!Write(file_descriptor, &header)) | |
| 351 return false; | |
| 352 for (size_t i = 0; i < count_; ++i) { | |
| 353 if (!Write(file_descriptor, stream(i))) | |
| 354 return false; | |
| 355 } | |
| 356 return true; | |
| 357 } | |
| 358 | |
| 359 bool SinkStreamSet::WriteSet(SinkStreamSet* set) { | |
| 360 uint32 lengths[kMaxStreams]; | |
| 361 // 'stream_count' includes all non-empty streams and all empty stream numbered | |
| 362 // lower than a non-empty stream. | |
| 363 size_t stream_count = 0; | |
| 364 for (size_t i = 0; i < kMaxStreams; ++i) { | |
| 365 SinkStream* stream = set->stream(i); | |
| 366 lengths[i] = stream->Length(); | |
| 367 if (lengths[i] > 0) | |
| 368 stream_count = i + 1; | |
| 369 } | |
| 370 | |
| 371 SinkStream* control_stream = this->stream(0); | |
| 372 control_stream->WriteVarint32(stream_count); | |
| 373 for (size_t i = 0; i < stream_count; ++i) { | |
| 374 control_stream->WriteVarint32(lengths[i]); | |
| 375 } | |
| 376 | |
| 377 for (size_t i = 0; i < stream_count; ++i) { | |
| 378 this->stream(i)->Append(set->stream(i)); | |
| 379 } | |
| 380 return true; | |
| 381 } | |
| 382 | |
| 383 } // namespace | |
| OLD | NEW |