courgette/streams.cc - Issue 1543643002: Switch to standard integer types in courgette/.

Side by Side Diff: courgette/streams.cc

Issue 1543643002: Switch to standard integer types in courgette/. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: fix Created 5 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Streams classes.	5 // Streams classes.

6 //	6 //

7 // These memory-resident streams are used for serializing data into a sequential	7 // These memory-resident streams are used for serializing data into a sequential

8 // region of memory.	8 // region of memory.

9 //	9 //

10 // Streams are divided into SourceStreams for reading and SinkStreams for	10 // Streams are divided into SourceStreams for reading and SinkStreams for

11 // writing. Streams are aggregated into Sets which allows several streams to be	11 // writing. Streams are aggregated into Sets which allows several streams to be

12 // used at once. Example: we can write A1, B1, A2, B2 but achieve the memory	12 // used at once. Example: we can write A1, B1, A2, B2 but achieve the memory

13 // layout A1 A2 B1 B2 by writing 'A's to one stream and 'B's to another.	13 // layout A1 A2 B1 B2 by writing 'A's to one stream and 'B's to another.

14 //	14 //

15 // The aggregated streams are important to Courgette's compression efficiency,	15 // The aggregated streams are important to Courgette's compression efficiency,

16 // we use it to cluster similar kinds of data which helps to generate longer	16 // we use it to cluster similar kinds of data which helps to generate longer

17 // common subsequences and repeated sequences.	17 // common subsequences and repeated sequences.

18	18

19 #include "courgette/streams.h"	19 #include "courgette/streams.h"

20	20

21 #include <memory.h>	21 #include <memory.h>

	22 #include <stddef.h>

	23 #include <stdint.h>

22	24

23 #include "base/basictypes.h"

24 #include "base/logging.h"	25 #include "base/logging.h"

25	26

26 namespace courgette {	27 namespace courgette {

27	28

28 // Update this version number if the serialization format of a StreamSet	29 // Update this version number if the serialization format of a StreamSet

29 // changes.	30 // changes.

30 static const unsigned int kStreamsSerializationFormatVersion = 20090218;	31 static const unsigned int kStreamsSerializationFormatVersion = 20090218;

31	32

32 //	33 //

33 // This is a cut down Varint implementation, implementing only what we use for	34 // This is a cut down Varint implementation, implementing only what we use for

34 // streams.	35 // streams.

35 //	36 //

36 class Varint {	37 class Varint {

37 public:	38 public:

38 // Maximum lengths of varint encoding of uint32	39 // Maximum lengths of varint encoding of uint32_t

39 static const int kMax32 = 5;	40 static const int kMax32 = 5;

40	41

41 // Parses a Varint32 encoded value from \|source\| and stores it in \|output\|,	42 // Parses a Varint32 encoded value from \|source\| and stores it in \|output\|,

42 // and returns a pointer to the following byte. Returns NULL if a valid	43 // and returns a pointer to the following byte. Returns NULL if a valid

43 // varint value was not found before \|limit\|.	44 // varint value was not found before \|limit\|.

44 static const uint8* Parse32WithLimit(const uint8* source, const uint8* limit,	45 static const uint8_t* Parse32WithLimit(const uint8_t* source,

45 uint32* output);	46 const uint8_t* limit,

	47 uint32_t* output);

46	48

47 // Writes the Varint32 encoded representation of \|value\| to buffer	49 // Writes the Varint32 encoded representation of \|value\| to buffer

48 // \|destination\|. \|destination\| must have sufficient length to hold kMax32	50 // \|destination\|. \|destination\| must have sufficient length to hold kMax32

49 // bytes. Returns a pointer to the byte just past the last encoded byte.	51 // bytes. Returns a pointer to the byte just past the last encoded byte.

50 static uint8* Encode32(uint8* destination, uint32 value);	52 static uint8_t* Encode32(uint8_t* destination, uint32_t value);

51 };	53 };

52	54

53 // Parses a Varint32 encoded unsigned number from \|source\|. The Varint32	55 // Parses a Varint32 encoded unsigned number from \|source\|. The Varint32

54 // encoding is a little-endian sequence of bytes containing base-128 digits,	56 // encoding is a little-endian sequence of bytes containing base-128 digits,

55 // with the high order bit set to indicate if there are more digits.	57 // with the high order bit set to indicate if there are more digits.

56 //	58 //

57 // For each byte, we mask out the digit and 'or' it into the right place in the	59 // For each byte, we mask out the digit and 'or' it into the right place in the

58 // result.	60 // result.

59 //	61 //

60 // The digit loop is unrolled for performance. It usually exits after the first	62 // The digit loop is unrolled for performance. It usually exits after the first

61 // one or two digits.	63 // one or two digits.

62 const uint8* Varint::Parse32WithLimit(const uint8* source,	64 const uint8_t* Varint::Parse32WithLimit(const uint8_t* source,

63 const uint8* limit,	65 const uint8_t* limit,

64 uint32* output) {	66 uint32_t* output) {

65 uint32 digit, result;	67 uint32_t digit, result;

66 if (source >= limit)	68 if (source >= limit)

67 return NULL;	69 return NULL;

68 digit = *(source++);	70 digit = *(source++);

69 result = digit & 127;	71 result = digit & 127;

70 if (digit < 128) {	72 if (digit < 128) {

71 *output = result;	73 *output = result;

72 return source;	74 return source;

73 }	75 }

74	76

75 if (source >= limit)	77 if (source >= limit)

(...skipping 30 matching lines...) Expand all Loading...
106 if (digit < 128) {	108 if (digit < 128) {

107 *output = result;	109 *output = result;

108 return source;	110 return source;

109 }	111 }

110	112

111 return NULL; // Value is too long to be a Varint32.	113 return NULL; // Value is too long to be a Varint32.

112 }	114 }

113	115

114 // Write the base-128 digits in little-endian order. All except the last digit	116 // Write the base-128 digits in little-endian order. All except the last digit

115 // have the high bit set to indicate more digits.	117 // have the high bit set to indicate more digits.

116 inline uint8* Varint::Encode32(uint8* destination, uint32 value) {	118 inline uint8_t* Varint::Encode32(uint8_t* destination, uint32_t value) {

117 while (value >= 128) {	119 while (value >= 128) {

118 *(destination++) = static_cast<uint8>(value) \| 128;	120 *(destination++) = static_cast<uint8_t>(value) \| 128;

119 value = value >> 7;	121 value = value >> 7;

120 }	122 }

121 *(destination++) = static_cast<uint8>(value);	123 *(destination++) = static_cast<uint8_t>(value);

122 return destination;	124 return destination;

123 }	125 }

124	126

125 void SourceStream::Init(const SinkStream& sink) {	127 void SourceStream::Init(const SinkStream& sink) {

126 Init(sink.Buffer(), sink.Length());	128 Init(sink.Buffer(), sink.Length());

127 }	129 }

128	130

129 bool SourceStream::Read(void* destination, size_t count) {	131 bool SourceStream::Read(void* destination, size_t count) {

130 if (current_ + count > end_)	132 if (current_ + count > end_)

131 return false;	133 return false;

132 memcpy(destination, current_, count);	134 memcpy(destination, current_, count);

133 current_ += count;	135 current_ += count;

134 return true;	136 return true;

135 }	137 }

136	138

137 bool SourceStream::ReadVarint32(uint32* output_value) {	139 bool SourceStream::ReadVarint32(uint32_t* output_value) {

138 const uint8* after = Varint::Parse32WithLimit(current_, end_, output_value);	140 const uint8_t* after = Varint::Parse32WithLimit(current_, end_, output_value);

139 if (!after)	141 if (!after)

140 return false;	142 return false;

141 current_ = after;	143 current_ = after;

142 return true;	144 return true;

143 }	145 }

144	146

145 bool SourceStream::ReadVarint32Signed(int32* output_value) {	147 bool SourceStream::ReadVarint32Signed(int32_t* output_value) {

146 // Signed numbers are encoded as unsigned numbers so that numbers nearer zero	148 // Signed numbers are encoded as unsigned numbers so that numbers nearer zero

147 // have shorter varint encoding.	149 // have shorter varint encoding.

148 // 0000xxxx encoded as 000xxxx0.	150 // 0000xxxx encoded as 000xxxx0.

149 // 1111xxxx encoded as 000yyyy1 where yyyy is complement of xxxx.	151 // 1111xxxx encoded as 000yyyy1 where yyyy is complement of xxxx.

150 uint32 unsigned_value;	152 uint32_t unsigned_value;

151 if (!ReadVarint32(&unsigned_value))	153 if (!ReadVarint32(&unsigned_value))

152 return false;	154 return false;

153 if (unsigned_value & 1)	155 if (unsigned_value & 1)

154 *output_value = ~static_cast<int32>(unsigned_value >> 1);	156 *output_value = ~static_cast<int32_t>(unsigned_value >> 1);

155 else	157 else

156 *output_value = (unsigned_value >> 1);	158 *output_value = (unsigned_value >> 1);

157 return true;	159 return true;

158 }	160 }

159	161

160 bool SourceStream::ShareSubstream(size_t offset, size_t length,	162 bool SourceStream::ShareSubstream(size_t offset, size_t length,

161 SourceStream* substream) {	163 SourceStream* substream) {

162 if (offset > Remaining())	164 if (offset > Remaining())

163 return false;	165 return false;

164 if (length > Remaining() - offset)	166 if (length > Remaining() - offset)

(...skipping 13 matching lines...) Expand all Loading...
178 if (current_ + byte_count > end_)	180 if (current_ + byte_count > end_)

179 return false;	181 return false;

180 current_ += byte_count;	182 current_ += byte_count;

181 return true;	183 return true;

182 }	184 }

183	185

184 CheckBool SinkStream::Write(const void* data, size_t byte_count) {	186 CheckBool SinkStream::Write(const void* data, size_t byte_count) {

185 return buffer_.append(static_cast<const char*>(data), byte_count);	187 return buffer_.append(static_cast<const char*>(data), byte_count);

186 }	188 }

187	189

188 CheckBool SinkStream::WriteVarint32(uint32 value) {	190 CheckBool SinkStream::WriteVarint32(uint32_t value) {

189 uint8 buffer[Varint::kMax32];	191 uint8_t buffer[Varint::kMax32];

190 uint8* end = Varint::Encode32(buffer, value);	192 uint8_t* end = Varint::Encode32(buffer, value);

191 return Write(buffer, end - buffer);	193 return Write(buffer, end - buffer);

192 }	194 }

193	195

194 CheckBool SinkStream::WriteVarint32Signed(int32 value) {	196 CheckBool SinkStream::WriteVarint32Signed(int32_t value) {

195 // Encode signed numbers so that numbers nearer zero have shorter	197 // Encode signed numbers so that numbers nearer zero have shorter

196 // varint encoding.	198 // varint encoding.

197 // 0000xxxx encoded as 000xxxx0.	199 // 0000xxxx encoded as 000xxxx0.

198 // 1111xxxx encoded as 000yyyy1 where yyyy is complement of xxxx.	200 // 1111xxxx encoded as 000yyyy1 where yyyy is complement of xxxx.

199 bool ret;	201 bool ret;

200 if (value < 0)	202 if (value < 0)

201 ret = WriteVarint32(~value * 2 + 1);	203 ret = WriteVarint32(~value * 2 + 1);

202 else	204 else

203 ret = WriteVarint32(value * 2);	205 ret = WriteVarint32(value * 2);

204 return ret;	206 return ret;

205 }	207 }

206	208

207 CheckBool SinkStream::WriteSizeVarint32(size_t value) {	209 CheckBool SinkStream::WriteSizeVarint32(size_t value) {

208 uint32 narrowed_value = static_cast<uint32>(value);	210 uint32_t narrowed_value = static_cast<uint32_t>(value);

209 // On 32-bit, the compiler should figure out this test always fails.	211 // On 32-bit, the compiler should figure out this test always fails.

210 LOG_ASSERT(value == narrowed_value);	212 LOG_ASSERT(value == narrowed_value);

211 return WriteVarint32(narrowed_value);	213 return WriteVarint32(narrowed_value);

212 }	214 }

213	215

214 CheckBool SinkStream::Append(SinkStream* other) {	216 CheckBool SinkStream::Append(SinkStream* other) {

215 bool ret = Write(other->buffer_.data(), other->buffer_.size());	217 bool ret = Write(other->buffer_.data(), other->buffer_.size());

216 if (ret)	218 if (ret)

217 other->Retire();	219 other->Retire();

218 return ret;	220 return ret;

(...skipping 13 matching lines...) Expand all Loading...
232 }	234 }

233	235

234	236

235 // Initializes from \|source\|.	237 // Initializes from \|source\|.

236 // The stream set for N streams is serialized as a header	238 // The stream set for N streams is serialized as a header

237 // <version><N><length1><length2>...<lengthN>	239 // <version><N><length1><length2>...<lengthN>

238 // followed by the stream contents	240 // followed by the stream contents

239 // <bytes1><bytes2>...<bytesN>	241 // <bytes1><bytes2>...<bytesN>

240 //	242 //

241 bool SourceStreamSet::Init(const void* source, size_t byte_count) {	243 bool SourceStreamSet::Init(const void* source, size_t byte_count) {

242 const uint8* start = static_cast<const uint8*>(source);	244 const uint8_t* start = static_cast<const uint8_t*>(source);

243 const uint8* end = start + byte_count;	245 const uint8_t* end = start + byte_count;

244	246

245 unsigned int version;	247 unsigned int version;

246 const uint8* finger = Varint::Parse32WithLimit(start, end, &version);	248 const uint8_t* finger = Varint::Parse32WithLimit(start, end, &version);

247 if (finger == NULL)	249 if (finger == NULL)

248 return false;	250 return false;

249 if (version != kStreamsSerializationFormatVersion)	251 if (version != kStreamsSerializationFormatVersion)

250 return false;	252 return false;

251	253

252 unsigned int count;	254 unsigned int count;

253 finger = Varint::Parse32WithLimit(finger, end, &count);	255 finger = Varint::Parse32WithLimit(finger, end, &count);

254 if (finger == NULL)	256 if (finger == NULL)

255 return false;	257 return false;

256 if (count > kMaxStreams)	258 if (count > kMaxStreams)

(...skipping 23 matching lines...) Expand all Loading...
280	282

281 return true;	283 return true;

282 }	284 }

283	285

284 bool SourceStreamSet::Init(SourceStream* source) {	286 bool SourceStreamSet::Init(SourceStream* source) {

285 // TODO(sra): consume the rest of \|source\|.	287 // TODO(sra): consume the rest of \|source\|.

286 return Init(source->Buffer(), source->Remaining());	288 return Init(source->Buffer(), source->Remaining());

287 }	289 }

288	290

289 bool SourceStreamSet::ReadSet(SourceStreamSet* set) {	291 bool SourceStreamSet::ReadSet(SourceStreamSet* set) {

290 uint32 stream_count = 0;	292 uint32_t stream_count = 0;

291 SourceStream* control_stream = this->stream(0);	293 SourceStream* control_stream = this->stream(0);

292 if (!control_stream->ReadVarint32(&stream_count))	294 if (!control_stream->ReadVarint32(&stream_count))

293 return false;	295 return false;

294	296

295 uint32 lengths[kMaxStreams] = {}; // i.e. all zero.	297 uint32_t lengths[kMaxStreams] = {}; // i.e. all zero.

296	298

297 for (size_t i = 0; i < stream_count; ++i) {	299 for (size_t i = 0; i < stream_count; ++i) {

298 if (!control_stream->ReadVarint32(&lengths[i]))	300 if (!control_stream->ReadVarint32(&lengths[i]))

299 return false;	301 return false;

300 }	302 }

301	303

302 for (size_t i = 0; i < stream_count; ++i) {	304 for (size_t i = 0; i < stream_count; ++i) {

303 if (!this->stream(i)->ReadSubstream(lengths[i], set->stream(i)))	305 if (!this->stream(i)->ReadSubstream(lengths[i], set->stream(i)))

304 return false;	306 return false;

305 }	307 }

(...skipping 51 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
357 if (ret) {	359 if (ret) {

358 ret = combined_stream->Append(&header);	360 ret = combined_stream->Append(&header);

359 for (size_t i = 0; ret && i < count_; ++i) {	361 for (size_t i = 0; ret && i < count_; ++i) {

360 ret = combined_stream->Append(stream(i));	362 ret = combined_stream->Append(stream(i));

361 }	363 }

362 }	364 }

363 return ret;	365 return ret;

364 }	366 }

365	367

366 CheckBool SinkStreamSet::WriteSet(SinkStreamSet* set) {	368 CheckBool SinkStreamSet::WriteSet(SinkStreamSet* set) {

367 uint32 lengths[kMaxStreams];	369 uint32_t lengths[kMaxStreams];

368 // 'stream_count' includes all non-empty streams and all empty stream numbered	370 // 'stream_count' includes all non-empty streams and all empty stream numbered

369 // lower than a non-empty stream.	371 // lower than a non-empty stream.

370 size_t stream_count = 0;	372 size_t stream_count = 0;

371 for (size_t i = 0; i < kMaxStreams; ++i) {	373 for (size_t i = 0; i < kMaxStreams; ++i) {

372 SinkStream* stream = set->stream(i);	374 SinkStream* stream = set->stream(i);

373 lengths[i] = static_cast<uint32>(stream->Length());	375 lengths[i] = static_cast<uint32_t>(stream->Length());

374 if (lengths[i] > 0)	376 if (lengths[i] > 0)

375 stream_count = i + 1;	377 stream_count = i + 1;

376 }	378 }

377	379

378 SinkStream* control_stream = this->stream(0);	380 SinkStream* control_stream = this->stream(0);

379 bool ret = control_stream->WriteSizeVarint32(stream_count);	381 bool ret = control_stream->WriteSizeVarint32(stream_count);

380 for (size_t i = 0; ret && i < stream_count; ++i) {	382 for (size_t i = 0; ret && i < stream_count; ++i) {

381 ret = control_stream->WriteSizeVarint32(lengths[i]);	383 ret = control_stream->WriteSizeVarint32(lengths[i]);

382 }	384 }

383	385

384 for (size_t i = 0; ret && i < stream_count; ++i) {	386 for (size_t i = 0; ret && i < stream_count; ++i) {

385 ret = this->stream(i)->Append(set->stream(i));	387 ret = this->stream(i)->Append(set->stream(i));

386 }	388 }

387 return ret;	389 return ret;

388 }	390 }

389	391

390 } // namespace	392 } // namespace

OLD	NEW

« no previous file with comments | « courgette/streams.h ('k') | courgette/streams_unittest.cc » ('j') | no next file with comments »