| Index: third_party/courgette/streams.cc
|
| ===================================================================
|
| --- third_party/courgette/streams.cc (revision 15668)
|
| +++ third_party/courgette/streams.cc (working copy)
|
| @@ -1,383 +0,0 @@
|
| -// Copyright (c) 2009 The Chromium Authors. All rights reserved.
|
| -// Use of this source code is governed by a BSD-style license that can be
|
| -// found in the LICENSE file.
|
| -
|
| -// Streams classes.
|
| -//
|
| -// These memory-resident streams are used for serializing data into a sequential
|
| -// region of memory.
|
| -//
|
| -// Streams are divided into SourceStreams for reading and SinkStreams for
|
| -// writing. Streams are aggregated into Sets which allows several streams to be
|
| -// used at once. Example: we can write A1, B1, A2, B2 but achieve the memory
|
| -// layout A1 A2 B1 B2 by writing 'A's to one stream and 'B's to another.
|
| -//
|
| -// The aggregated streams are important to Courgette's compression efficiency,
|
| -// we use it to cluster similar kinds of data which helps to generate longer
|
| -// common subsequences and repeated sequences.
|
| -
|
| -#include "third_party/courgette/streams.h"
|
| -
|
| -#include <io.h>
|
| -#include <memory.h>
|
| -
|
| -#include "base/basictypes.h"
|
| -
|
| -namespace courgette {
|
| -
|
| -// Update this version number if the serialization format of a StreamSet
|
| -// changes.
|
| -static const unsigned int kStreamsSerializationFormatVersion = 20090218;
|
| -
|
| -//
|
| -// This is a cut down Varint implementation, implementing only what we use for
|
| -// streams. The original implementation is at google3/util/coding/varint.h
|
| -//
|
| -class Varint {
|
| - public:
|
| - // Maximum lengths of varint encoding of uint32
|
| - static const int kMax32 = 5;
|
| -
|
| - // Parses a Varint32 encoded value from |source| and stores it in |output|,
|
| - // and returns a pointer to the following byte. Returns NULL if a valid
|
| - // varint value was not found before |limit|.
|
| - static const uint8* Parse32WithLimit(const uint8* source, const uint8* limit,
|
| - uint32* output);
|
| -
|
| - // Writes the Varint32 encoded representation of |value| to buffer
|
| - // |destination|. |destination| must have sufficient length to hold kMax32
|
| - // bytes. Returns a pointer to the byte just past the last encoded byte.
|
| - static uint8* Encode32(uint8* destination, uint32 value);
|
| -};
|
| -
|
| -// Parses a Varint32 encoded unsigned number from |source|. The Varint32
|
| -// encoding is a little-endian sequence of bytes containing base-128 digits,
|
| -// with the high order bit set to indicate if there are more digits.
|
| -//
|
| -// For each byte, we mask out the digit and 'or' it into the right place in the
|
| -// result.
|
| -//
|
| -// The digit loop is unrolled for performance. It usually exits after the first
|
| -// one or two digits.
|
| -const uint8* Varint::Parse32WithLimit(const uint8* source,
|
| - const uint8* limit,
|
| - uint32* output) {
|
| - uint32 digit, result;
|
| - if (source >= limit)
|
| - return NULL;
|
| - digit = *(source++);
|
| - result = digit & 127;
|
| - if (digit < 128) {
|
| - *output = result;
|
| - return source;
|
| - }
|
| -
|
| - if (source >= limit)
|
| - return NULL;
|
| - digit = *(source++);
|
| - result |= (digit & 127) << 7;
|
| - if (digit < 128) {
|
| - *output = result;
|
| - return source;
|
| - }
|
| -
|
| - if (source >= limit)
|
| - return NULL;
|
| - digit = *(source++);
|
| - result |= (digit & 127) << 14;
|
| - if (digit < 128) {
|
| - *output = result;
|
| - return source;
|
| - }
|
| -
|
| - if (source >= limit)
|
| - return NULL;
|
| - digit = *(source++);
|
| - result |= (digit & 127) << 21;
|
| - if (digit < 128) {
|
| - *output = result;
|
| - return source;
|
| - }
|
| -
|
| - if (source >= limit)
|
| - return NULL;
|
| - digit = *(source++);
|
| - result |= (digit & 127) << 28;
|
| - if (digit < 128) {
|
| - *output = result;
|
| - return source;
|
| - }
|
| -
|
| - return NULL; // Value is too long to be a Varint32.
|
| -}
|
| -
|
| -// Write the base-128 digits in little-endian order. All except the last digit
|
| -// have the high bit set to indicate more digits.
|
| -inline uint8* Varint::Encode32(uint8* destination, uint32 value) {
|
| - while (value >= 128) {
|
| - *(destination++) = value | 128;
|
| - value = value >> 7;
|
| - }
|
| - *(destination++) = value;
|
| - return destination;
|
| -}
|
| -
|
| -void SourceStream::Init(const SinkStream& sink) {
|
| - Init(sink.Buffer(), sink.Length());
|
| -}
|
| -
|
| -bool SourceStream::Read(void* destination, size_t count) {
|
| - if (current_ + count > end_)
|
| - return false;
|
| - memcpy(destination, current_, count);
|
| - current_ += count;
|
| - return true;
|
| -}
|
| -
|
| -bool SourceStream::ReadVarint32(uint32* output_value) {
|
| - const uint8* after = Varint::Parse32WithLimit(current_, end_, output_value);
|
| - if (!after)
|
| - return false;
|
| - current_ = after;
|
| - return true;
|
| -}
|
| -
|
| -bool SourceStream::ReadVarint32Signed(int32* output_value) {
|
| - // Signed numbers are encoded as unsigned numbers so that numbers nearer zero
|
| - // have shorter varint encoding.
|
| - // 0000xxxx encoded as 000xxxx0.
|
| - // 1111xxxx encoded as 000yyyy1 where yyyy is complement of xxxx.
|
| - uint32 unsigned_value;
|
| - if (!ReadVarint32(&unsigned_value))
|
| - return false;
|
| - if (unsigned_value & 1)
|
| - *output_value = ~static_cast<int32>(unsigned_value >> 1);
|
| - else
|
| - *output_value = (unsigned_value >> 1);
|
| - return true;
|
| -}
|
| -
|
| -bool SourceStream::ShareSubstream(size_t offset, size_t length,
|
| - SourceStream* substream) {
|
| - if (offset > Remaining())
|
| - return false;
|
| - if (length > Remaining() - offset)
|
| - return false;
|
| - substream->Init(current_ + offset, length);
|
| - return true;
|
| -}
|
| -
|
| -bool SourceStream::ReadSubstream(size_t length, SourceStream* substream) {
|
| - if (!ShareSubstream(0, length, substream))
|
| - return false;
|
| - current_ += length;
|
| - return true;
|
| -}
|
| -
|
| -bool SourceStream::Skip(size_t byte_count) {
|
| - if (current_ + byte_count > end_)
|
| - return false;
|
| - current_ += byte_count;
|
| - return true;
|
| -}
|
| -
|
| -void SinkStream::Write(const void* data, size_t byte_count) {
|
| - buffer_.append(static_cast<const char*>(data), byte_count);
|
| -}
|
| -
|
| -void SinkStream::WriteVarint32(uint32 value) {
|
| - uint8 buffer[Varint::kMax32];
|
| - uint8* end = Varint::Encode32(buffer, value);
|
| - Write(buffer, end - buffer);
|
| -}
|
| -
|
| -void SinkStream::WriteVarint32Signed(int32 value) {
|
| - // Encode signed numbers so that numbers nearer zero have shorter
|
| - // varint encoding.
|
| - // 0000xxxx encoded as 000xxxx0.
|
| - // 1111xxxx encoded as 000yyyy1 where yyyy is complement of xxxx.
|
| - if (value < 0)
|
| - WriteVarint32(~value * 2 + 1);
|
| - else
|
| - WriteVarint32(value * 2);
|
| -}
|
| -
|
| -void SinkStream::Append(SinkStream* other) {
|
| - Write(other->buffer_.c_str(), other->buffer_.size());
|
| - other->buffer_.clear();
|
| - other->buffer_.reserve(0); // Non-binding request to reduce storage.
|
| -}
|
| -
|
| -////////////////////////////////////////////////////////////////////////////////
|
| -
|
| -SourceStreamSet::SourceStreamSet()
|
| - : count_(kMaxStreams) {
|
| -}
|
| -
|
| -SourceStreamSet::~SourceStreamSet() {
|
| -}
|
| -
|
| -
|
| -// Initializes from |source|.
|
| -// The stream set for N streams is serialized as a header
|
| -// <version><N><length1><length2>...<lengthN>
|
| -// followed by the stream contents
|
| -// <bytes1><bytes2>...<bytesN>
|
| -//
|
| -bool SourceStreamSet::Init(const void* source, size_t byte_count) {
|
| - const uint8* start = static_cast<const uint8*>(source);
|
| - const uint8* end = start + byte_count;
|
| -
|
| - unsigned int version;
|
| - const uint8* finger = Varint::Parse32WithLimit(start, end, &version);
|
| - if (finger == NULL)
|
| - return false;
|
| - if (version != kStreamsSerializationFormatVersion)
|
| - return false;
|
| -
|
| - unsigned int count;
|
| - finger = Varint::Parse32WithLimit(finger, end, &count);
|
| - if (finger == NULL)
|
| - return false;
|
| - if (count > kMaxStreams)
|
| - return false;
|
| -
|
| - count_ = count;
|
| -
|
| - unsigned int lengths[kMaxStreams];
|
| - size_t accumulated_length = 0;
|
| -
|
| - for (size_t i = 0; i < count_; ++i) {
|
| - finger = Varint::Parse32WithLimit(finger, end, &lengths[i]);
|
| - if (finger == NULL)
|
| - return false;
|
| - accumulated_length += lengths[i];
|
| - }
|
| -
|
| - // Remaining bytes should add up to sum of lengths.
|
| - if (end - finger != accumulated_length)
|
| - return false;
|
| -
|
| - accumulated_length = finger - start;
|
| - for (size_t i = 0; i < count_; ++i) {
|
| - stream(i)->Init(start + accumulated_length, lengths[i]);
|
| - accumulated_length += lengths[i];
|
| - }
|
| -
|
| - return true;
|
| -}
|
| -
|
| -bool SourceStreamSet::Init(SourceStream* source) {
|
| - // TODO(sra): consume the rest of |source|.
|
| - return Init(source->Buffer(), source->Remaining());
|
| -}
|
| -
|
| -bool SourceStreamSet::ReadSet(SourceStreamSet* set) {
|
| - uint32 stream_count = 0;
|
| - SourceStream* control_stream = this->stream(0);
|
| - if (!control_stream->ReadVarint32(&stream_count))
|
| - return false;
|
| -
|
| - uint32 lengths[kMaxStreams] = {}; // i.e. all zero.
|
| -
|
| - for (size_t i = 0; i < stream_count; ++i) {
|
| - if (!control_stream->ReadVarint32(&lengths[i]))
|
| - return false;
|
| - }
|
| -
|
| - for (size_t i = 0; i < stream_count; ++i) {
|
| - if (!this->stream(i)->ReadSubstream(lengths[i], set->stream(i)))
|
| - return false;
|
| - }
|
| - return true;
|
| -}
|
| -
|
| -bool SourceStreamSet::Empty() const {
|
| - for (size_t i = 0; i < count_; ++i) {
|
| - if (streams_[i].Remaining() != 0)
|
| - return false;
|
| - }
|
| - return true;
|
| -}
|
| -
|
| -////////////////////////////////////////////////////////////////////////////////
|
| -
|
| -SinkStreamSet::SinkStreamSet()
|
| - : count_(kMaxStreams) {
|
| -}
|
| -
|
| -SinkStreamSet::~SinkStreamSet() {
|
| -}
|
| -
|
| -void SinkStreamSet::Init(size_t stream_index_limit) {
|
| - count_ = stream_index_limit;
|
| -}
|
| -
|
| -// The header for a stream set for N streams is serialized as
|
| -// <version><N><length1><length2>...<lengthN>
|
| -void SinkStreamSet::CopyHeaderTo(SinkStream* header) {
|
| - header->WriteVarint32(kStreamsSerializationFormatVersion);
|
| - header->WriteVarint32(count_);
|
| - for (size_t i = 0; i < count_; ++i) {
|
| - header->WriteVarint32(stream(i)->Length());
|
| - }
|
| -}
|
| -
|
| -// Writes |this| to |combined_stream|. See SourceStreamSet::Init for the layout
|
| -// of the stream metadata and contents.
|
| -bool SinkStreamSet::CopyTo(SinkStream *combined_stream) {
|
| - SinkStream header;
|
| - CopyHeaderTo(&header);
|
| - combined_stream->Append(&header);
|
| - for (size_t i = 0; i < count_; ++i) {
|
| - combined_stream->Append(stream(i));
|
| - }
|
| - return true;
|
| -}
|
| -
|
| -namespace {
|
| -bool Write(int file_descriptor, SinkStream* sink) {
|
| - size_t length = sink->Length();
|
| - const void *buffer = sink->Buffer();
|
| - int bytes_written = _write(file_descriptor, buffer, length);
|
| - return bytes_written == length;
|
| -}
|
| -}
|
| -
|
| -bool SinkStreamSet::CopyToFileDescriptor(int file_descriptor) {
|
| - SinkStream header;
|
| - CopyHeaderTo(&header);
|
| - if (!Write(file_descriptor, &header))
|
| - return false;
|
| - for (size_t i = 0; i < count_; ++i) {
|
| - if (!Write(file_descriptor, stream(i)))
|
| - return false;
|
| - }
|
| - return true;
|
| -}
|
| -
|
| -bool SinkStreamSet::WriteSet(SinkStreamSet* set) {
|
| - uint32 lengths[kMaxStreams];
|
| - // 'stream_count' includes all non-empty streams and all empty stream numbered
|
| - // lower than a non-empty stream.
|
| - size_t stream_count = 0;
|
| - for (size_t i = 0; i < kMaxStreams; ++i) {
|
| - SinkStream* stream = set->stream(i);
|
| - lengths[i] = stream->Length();
|
| - if (lengths[i] > 0)
|
| - stream_count = i + 1;
|
| - }
|
| -
|
| - SinkStream* control_stream = this->stream(0);
|
| - control_stream->WriteVarint32(stream_count);
|
| - for (size_t i = 0; i < stream_count; ++i) {
|
| - control_stream->WriteVarint32(lengths[i]);
|
| - }
|
| -
|
| - for (size_t i = 0; i < stream_count; ++i) {
|
| - this->stream(i)->Append(set->stream(i));
|
| - }
|
| - return true;
|
| -}
|
| -
|
| -} // namespace
|
|
|