src/interpreter/source-position-table.cc - Issue 1704943002: Encode interpreter::SourcePositionTable as variable-length ints.

Side by Side Diff: src/interpreter/source-position-table.cc

Issue 1704943002: Encode interpreter::SourcePositionTable as variable-length ints. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Fix iterator. Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2016 the V8 project authors. All rights reserved.	1 // Copyright 2016 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/interpreter/source-position-table.h"	5 #include "src/interpreter/source-position-table.h"

6	6

7 #include "src/assembler.h"

8 #include "src/objects-inl.h"	7 #include "src/objects-inl.h"

9 #include "src/objects.h"	8 #include "src/objects.h"

10	9

11 namespace v8 {	10 namespace v8 {

12 namespace internal {	11 namespace internal {

13 namespace interpreter {	12 namespace interpreter {

14	13

15 class IsStatementField : public BitField<bool, 0, 1> {};	14 // We'll use a simple encoding scheme to record the source positions.

16 class SourcePositionField : public BitField<int, 1, 30> {};	15 // Conceptually, each position consists of:

	16 // - bytecode_offset: An integer index into the BytecodeArray

	17 // - source_position: An integer index into the source string.

	18 // - position type: Each position is either a statement or an expression.

	19 //

	20 // The basic idea for the encoding is to use a variable-length integer coding,

	21 // where each byte contains 7 bits of payload data, and 1 'more' bit that

	22 // determines whether additional bytes follow. Additionally:

	23 // - we record the difference from the previous position,

	24 // - we just stuff one bit for the type into the bytecode offset,

	25 // - negative numbers occur only rarely, so we use a denormalized lead byte

	26 // (a lead byte with all zeros, which normally wouldn't make any sense)

	27 // to encode a negative sign, so that we 'pay' nothing for positive numbers,

	28 // but have to pay a full byte for negative integers.

	29

	30 namespace {

	31 // We'll use the BitField<..> template to handle the actual encoding.

	32 // Most bytes will be a MoreBit w/ ValueBits. If we also want to encode

	33 // the type, we'll have a byte with MoreBit, TypeBit, and TypeValueBits,

	34 // followed by zero or more 'normal' bytes.

	35 class MoreBit : public BitField8<bool, 7, 1> {};

	36 class ValueBits : public BitField8<int, 0, 7> {};

	37 class TypeBit : public BitField8<bool, 6, 1> {};
	Yang 2016/02/17 19:54:51 Since we always stuff the type bit into the byteco Since we always stuff the type bit into the bytecode offset, wouldn't it be more straight-forward to have MoreBit, SourcePositionBits, TypeBit, and CodeOffsetBits? vogelheim 2016/02/18 13:06:09 I disagree. Only the first byte is different; all Show quoted text On 2016/02/17 19:54:51, Yang wrote: > Since we always stuff the type bit into the bytecode offset, wouldn't it be more > straight-forward to have MoreBit, SourcePositionBits, TypeBit, and > CodeOffsetBits? I disagree. Only the first byte is different; all subsequent bytes are always MoreBit\|ValueBits. I.e., the data format it: [MoreBit\|TypeBit\|TypeValueBits] [MoreBit\|ValueBits]* or: [MoreBit\|ValueBits]* So the value bits really are the bits for all of the values. Yang 2016/02/19 08:28:02 I see. Thanks for the explanation. Show quoted text On 2016/02/18 13:06:09, vogelheim wrote: > On 2016/02/17 19:54:51, Yang wrote: > > Since we always stuff the type bit into the bytecode offset, wouldn't it be > more > > straight-forward to have MoreBit, SourcePositionBits, TypeBit, and > > CodeOffsetBits? > > I disagree. Only the first byte is different; all subsequent bytes are always > MoreBit\|ValueBits. I.e., the data format it: > > [MoreBit\|TypeBit\|TypeValueBits] [MoreBit\|ValueBits]* > or: [MoreBit\|ValueBits]* > > So the value bits really are the bits for all of the values. I see. Thanks for the explanation.
	38 class TypeValueBits : public BitField8<int, 0, 6> {};

	39

	40 // Helper: Number of bytes for given value in 'value' bytes encoding.

	41 int SizeOfValue(int value) {

	42 DCHECK(value >= 0);

	43 int size = 1;

	44 while (value >= (1 << size * ValueBits::kSize)) size++;
	Yang 2016/02/17 19:54:51 Let's use brackets around the multiplication. Let's use brackets around the multiplication. vogelheim 2016/02/18 13:06:09 Done. Show quoted text On 2016/02/17 19:54:51, Yang wrote: > Let's use brackets around the multiplication. Done.
	45 return size;

	46 }

	47

	48 // Helper: Number of bytes for given value (in 'type + value') bytes encoding.

	49 int SizeOfTypeValue(int value) {

	50 DCHECK(value >= 0);

	51 int size = 1;

	52 while (value >= (1 << (TypeValueBits::kSize + (size - 1) * ValueBits::kSize)))

	53 size++;

	54 return size;

	55 }

	56

	57 // Helper: Encode one 'normal' byte.

	58 void EncodeByte(ZoneVector<byte>& bytes, bool more, int value) {

	59 bytes.push_back(MoreBit::encode(more) \|

	60 ValueBits::encode(value & ValueBits::kMax));

	61 }

	62

	63 // Helper: Encode a byte w/ a flag value.
	Yang 2016/02/17 19:54:51 Can we call this 'type' instead of 'flag' to be co Can we call this 'type' instead of 'flag' to be consistent? vogelheim 2016/02/18 13:06:09 Done. Show quoted text On 2016/02/17 19:54:51, Yang wrote: > Can we call this 'type' instead of 'flag' to be consistent? Done.
	64 void EncodeFlagByte(ZoneVector<byte>& bytes, bool more, bool flag, int value) {

	65 bytes.push_back(MoreBit::encode(more) \| TypeBit::encode(flag) \|

	66 TypeValueBits::encode(value & TypeValueBits::kMax));

	67 }

	68

	69 // Helper: Encode a positive integer w/ the given size.

	70 void EncodeBytes(ZoneVector<byte>& bytes, int value, int size) {

	71 DCHECK(value >= 0);

	72 for (int i = 0; i < size; i++) {

	73 int byte_no = size - i - 1;

	74 EncodeByte(bytes, byte_no != 0, value >> (ValueBits::kSize * byte_no));

	75 }

	76 }

	77

	78 // Encode an integer.

	79 void EncodeValue(ZoneVector<byte>& bytes, int value) {

	80 if (value < 0) {

	81 EncodeByte(bytes, true, 0);

	82 value = -value;
	Yang 2016/02/17 19:54:51 Let's assert that value is not kIntMin. Or else th Let's assert that value is not kIntMin. Or else this could get ugly. vogelheim 2016/02/18 13:06:09 Done. Show quoted text On 2016/02/17 19:54:51, Yang wrote: > Let's assert that value is not kIntMin. Or else this could get ugly. Done.
	83 }

	84 EncodeBytes(bytes, value, SizeOfValue(value));

	85 }

	86

	87 // Encode an integer and a flag.

	88 void EncodeFlagValue(ZoneVector<byte>& bytes, bool flag, int value) {

	89 if (value < 0) {

	90 EncodeFlagByte(bytes, true, flag, 0);

	91 EncodeFlagValue(bytes, false, -value);

	92 } else {

	93 int size = SizeOfTypeValue(value);

	94 EncodeFlagByte(bytes, (size > 1), flag,

	95 value >> (size - 1) * ValueBits::kSize);

	96 EncodeBytes(bytes, value, size - 1);

	97 }

	98 }

	99

	100 // Helper: Decode a series of 'value' bytes. Assume the current byte has

	101 // been decoded, but use it to decide whether we need more bytes.

	102 void DecodeBytes(const byte* bytes, int* index, int& value) {

	103 byte current = bytes[(*index)++];

	104 while (MoreBit::decode(current)) {

	105 current = bytes[(*index)++];

	106 value = (value << ValueBits::kSize) + ValueBits::decode(current);

	107 }

	108 }

	109

	110 // Decode an integer at bytes.get(index). Inverse of EncodeValue.

	111 int DecodeValue(const byte* bytes, int* index) {

	112 byte current = bytes[*index];

	113 int val = ValueBits::decode(current);

	114 bool sign = (val == 0);

	115 DecodeBytes(bytes, index, val);

	116 return sign ? -val : val;

	117 }

	118

	119 // Decode an integer and a flag at bytes.get(index). Inverse of

	120 // EncodeFlagValue.

	121 int DecodeFlagValue(const byte* bytes, int* index, bool* flag) {

	122 int old_index = *index;

	123 byte current = bytes[*index];

	124 *flag = TypeBit::decode(current);

	125 int val = TypeValueBits::decode(current);

	126 DecodeBytes(bytes, index, val);

	127 bool sign = (*index - old_index) > SizeOfTypeValue(val);

	128 return sign ? -val : val;

	129 }

	130

	131 // Helper: Scan one value backwards.

	132 // *index is expected to point behind the current 'complete' value encoding

	133 // and will be set to point behind he previous one.

	134 void EncodeScanBackwards(ZoneVector<byte>& bytes, int* index) {

	135 DCHECK(bytes.size() > 0);

	136 DCHECK(*index > 0);

	137

	138 // Index is expected to point behind a 'complete' value encoding.

	139 (*index)--;

	140 DCHECK(!MoreBit::decode(bytes[*index]));

	141

	142 // Scan back until we either hit the beginning of our bytes, or another byte

	143 // without 'more' bit.

	144 while (index > 0 && MoreBit::decode(bytes[(index) - 1])) {

	145 (*index)--;

	146 }

	147 DCHECK(*index >= 0);

	148 }

	149

	150 } // namespace

	151

	152 // SourcePositionTableCodec
	Yang 2016/02/17 19:54:51 I don't really see the value of having a standalon I don't really see the value of having a standalone class for encoding or decoding. The only thing encoding and decoding shares are the previous_* fields, but otherwise do not share any code. I'd simply move them into the builder and iterator classes. vogelheim 2016/02/18 13:06:09 Done. Show quoted text On 2016/02/17 19:54:51, Yang wrote: > I don't really see the value of having a standalone class for encoding or > decoding. The only thing encoding and decoding shares are the previous_* fields, > but otherwise do not share any code. I'd simply move them into the builder and > iterator classes. Done.
	153

	154 SourcePositionTableCodec::SourcePositionTableCodec()

	155 : previous_bytecode_offset_(0), previous_source_position_(0) {}

	156

	157 SourcePositionTableCodec::~SourcePositionTableCodec() {}

	158

	159 void SourcePositionTableCodec::Encode(ZoneVector<byte>& bytes,

	160 PositionTableEntry entry) {

	161 EncodeFlagValue(bytes, entry.is_statement,

	162 entry.bytecode_offset - previous_bytecode_offset_);

	163 EncodeValue(bytes, entry.source_position - previous_source_position_);

	164

	165 previous_bytecode_offset_ = entry.bytecode_offset;

	166 previous_source_position_ = entry.source_position;

	167 }

	168

	169 void SourcePositionTableCodec::EncodeRevertPosition(ZoneVector<byte>& bytes,

	170 int bytecode_offset) {

	171 if (bytes.size() == 0 \|\| bytecode_offset != previous_bytecode_offset_) return;

	172

	173 // Reverting means we need to discard 2 variable length ints. We also need

	174 // to read them, in order to fixup the previous_*_ member variables.

	175

	176 // Scan backwards for 2 values.

	177 int index = static_cast<int>(bytes.size());

	178 EncodeScanBackwards(bytes, &index);

	179 EncodeScanBackwards(bytes, &index);

	180

	181 // Read from index to adjust previous_* values, but throw away all other

	182 // info.

	183 {

	184 int tmp_index = index;

	185 bool tmp_flag;

	186 previous_bytecode_offset_ -=

	187 DecodeFlagValue(&*bytes.begin(), &tmp_index, &tmp_flag);

	188 previous_source_position_ -= DecodeValue(&*bytes.begin(), &tmp_index);

	189 }

	190

	191 bytes.resize(index);

	192 }

	193

	194 void SourcePositionTableCodec::Decode(const byte* bytes, int* index,

	195 PositionTableEntry* entry) {

	196 previous_bytecode_offset_ +=

	197 DecodeFlagValue(bytes, index, &(entry->is_statement));

	198 previous_source_position_ += DecodeValue(bytes, index);

	199

	200 entry->bytecode_offset = previous_bytecode_offset_;

	201 entry->source_position = previous_source_position_;

	202 }

	203

	204 // SourcePositionTableBuilder

17	205

18 void SourcePositionTableBuilder::AddStatementPosition(size_t bytecode_offset,	206 void SourcePositionTableBuilder::AddStatementPosition(size_t bytecode_offset,

19 int source_position) {	207 int source_position) {

20 int offset = static_cast<int>(bytecode_offset);	208 AddEntry({static_cast<int>(bytecode_offset), source_position, true});

21 // If a position has already been assigned to this bytecode offset,

22 // do not reassign a new statement position.

23 if (CodeOffsetHasPosition(offset)) return;

24 uint32_t encoded = IsStatementField::encode(true) \|

25 SourcePositionField::encode(source_position);

26 entries_.push_back({offset, encoded});

27 }	209 }

28	210

29 void SourcePositionTableBuilder::AddExpressionPosition(size_t bytecode_offset,	211 void SourcePositionTableBuilder::AddExpressionPosition(size_t bytecode_offset,

30 int source_position) {	212 int source_position) {

31 int offset = static_cast<int>(bytecode_offset);	213 AddEntry({static_cast<int>(bytecode_offset), source_position, false});

32 // If a position has already been assigned to this bytecode offset,	214 }

33 // do not reassign a new statement position.	215

34 if (CodeOffsetHasPosition(offset)) return;	216 void SourcePositionTableBuilder::AddEntry(const PositionTableEntry& entry) {

35 uint32_t encoded = IsStatementField::encode(false) \|	217 if (!CodeOffsetHasPosition(entry.bytecode_offset)) {

36 SourcePositionField::encode(source_position);	218 codec_.Encode(bytes_, entry);

37 entries_.push_back({offset, encoded});	219 #ifdef ENABLE_SLOW_DCHECKS

	220 raw_entries_.push_back(entry);

	221 #endif

	222 }

38 }	223 }

39	224

40 void SourcePositionTableBuilder::RevertPosition(size_t bytecode_offset) {	225 void SourcePositionTableBuilder::RevertPosition(size_t bytecode_offset) {

41 int offset = static_cast<int>(bytecode_offset);	226 int offset = static_cast<int>(bytecode_offset);

42 // If we already added a source position table entry, but the bytecode array	227 // If we already added a source position table entry, but the bytecode array

43 // builder ended up not outputting a bytecode for the corresponding bytecode	228 // builder ended up not outputting a bytecode for the corresponding bytecode

44 // offset, we have to remove that entry.	229 // offset, we have to remove that entry.

45 if (CodeOffsetHasPosition(offset)) entries_.pop_back();	230 if (CodeOffsetHasPosition(offset)) {

	231 codec_.EncodeRevertPosition(bytes_, offset);

	232 #ifdef ENABLE_SLOW_DCHECKS

	233 raw_entries_.pop_back();

	234 #endif

	235 }

46 }	236 }

47	237

48 Handle<FixedArray> SourcePositionTableBuilder::ToFixedArray() {	238 Handle<ByteArray> SourcePositionTableBuilder::ToSourcePositionTable() {

49 int length = static_cast<int>(entries_.size());	239 Handle<ByteArray> table = isolate_->factory()->NewByteArray(

50 Handle<FixedArray> table =	240 static_cast<int>(bytes_.size()), TENURED);

51 isolate_->factory()->NewFixedArray(length * 2, TENURED);	241 if (bytes_.empty()) return table;

52 for (int i = 0; i < length; i++) {	242

53 table->set(i * 2, Smi::FromInt(entries_[i].bytecode_offset));	243 MemCopy(table->GetDataStartAddress(), &*bytes_.begin(), bytes_.size());

54 table->set(i * 2 + 1, Smi::FromInt(entries_[i].source_position_and_type));	244

	245 #ifdef ENABLE_SLOW_DCHECKS

	246 // Brute force testing: Record all positions and decode

	247 // the entire table to verify they are identical.

	248 SourcePositionTableIterator encoded(*table);

	249 auto raw = raw_entries_.begin();

	250 for (; !encoded.done(); encoded.Advance(), raw++) {

	251 DCHECK(raw != raw_entries_.end());

	252 DCHECK_EQ(encoded.bytecode_offset(), raw->bytecode_offset);

	253 DCHECK_EQ(encoded.source_position(), raw->source_position);

	254 DCHECK_EQ(encoded.is_statement(), raw->is_statement);

55 }	255 }

	256 DCHECK(raw == raw_entries_.end());

	257 #endif

	258

56 return table;	259 return table;

57 }	260 }

58	261

	262 // SourcePositionTableIterator

	263

59 SourcePositionTableIterator::SourcePositionTableIterator(	264 SourcePositionTableIterator::SourcePositionTableIterator(

60 BytecodeArray* bytecode_array)	265 BytecodeArray* bytecode_array)

61 : table_(bytecode_array->source_position_table()),	266 : SourcePositionTableIterator(bytecode_array->source_position_table()) {}

62 index_(0),	267

63 length_(table_->length()) {	268 SourcePositionTableIterator::SourcePositionTableIterator(ByteArray* byte_array)

64 DCHECK(table_->length() % 2 == 0);	269 : table_(byte_array), index_(0), current_({0, 0, false}) {

65 Advance();	270 Advance();

66 }	271 }

67	272

68 void SourcePositionTableIterator::Advance() {	273 void SourcePositionTableIterator::Advance() {

69 if (index_ < length_) {	274 DCHECK(!done());

70 int new_bytecode_offset = Smi::cast(table_->get(index_))->value();	275 DCHECK(index_ >= 0 && index_ <= table_->length());

71 // Bytecode offsets are in ascending order.	276 if (index_ == table_->length()) {

72 DCHECK(bytecode_offset_ < new_bytecode_offset \|\| index_ == 0);	277 index_ = kDone;

73 bytecode_offset_ = new_bytecode_offset;	278 } else {

74 uint32_t source_position_and_type =	279 codec_.Decode(table_->GetDataStartAddress(), &index_, &current_);

75 static_cast<uint32_t>(Smi::cast(table_->get(index_ + 1))->value());

76 is_statement_ = IsStatementField::decode(source_position_and_type);

77 source_position_ = SourcePositionField::decode(source_position_and_type);

78 }	280 }

79 index_ += 2;

80 }	281 }

81	282

82 } // namespace interpreter	283 } // namespace interpreter

83 } // namespace internal	284 } // namespace internal

84 } // namespace v8	285 } // namespace v8

OLD	NEW

« src/interpreter/source-position-table.h ('K') | « src/interpreter/source-position-table.h ('k') | src/objects.h » ('j') | test/unittests/interpreter/source-position-table-unittest.cc » ('J')