src/interpreter/source-position-table.cc - Issue 1704943002: Encode interpreter::SourcePositionTable as variable-length ints.

Side by Side Diff: src/interpreter/source-position-table.cc

Issue 1704943002: Encode interpreter::SourcePositionTable as variable-length ints. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Address Yang's comments. Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2016 the V8 project authors. All rights reserved.	1 // Copyright 2016 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/interpreter/source-position-table.h"	5 #include "src/interpreter/source-position-table.h"

6	6

7 #include "src/assembler.h"

8 #include "src/objects-inl.h"	7 #include "src/objects-inl.h"

9 #include "src/objects.h"	8 #include "src/objects.h"

10	9

11 namespace v8 {	10 namespace v8 {

12 namespace internal {	11 namespace internal {

13 namespace interpreter {	12 namespace interpreter {

14	13

15 class IsStatementField : public BitField<bool, 0, 1> {};	14 // We'll use a simple encoding scheme to record the source positions.

16 class SourcePositionField : public BitField<int, 1, 30> {};	15 // Conceptually, each position consists of:

	16 // - bytecode_offset: An integer index into the BytecodeArray

	17 // - source_position: An integer index into the source string.

	18 // - position type: Each position is either a statement or an expression.

	19 //

	20 // The basic idea for the encoding is to use a variable-length integer coding,

	21 // where each byte contains 7 bits of payload data, and 1 'more' bit that

	22 // determines whether additional bytes follow. Additionally:

	23 // - we record the difference from the previous position,

	24 // - we just stuff one bit for the type into the bytecode offset,

	25 // - negative numbers occur only rarely, so we use a denormalized lead byte

	26 // (a lead byte with all zeros, which normally wouldn't make any sense)

	27 // to encode a negative sign, so that we 'pay' nothing for positive numbers,

	28 // but have to pay a full byte for negative integers.

	29

	30 namespace {

	31 // We'll use the BitField<..> template to handle the actual encoding.

	32 // Most bytes will be a MoreBit w/ ValueBits. If we also want to encode

	33 // the type, we'll have a byte with MoreBit, TypeBit, and TypeValueBits,

	34 // followed by zero or more 'normal' bytes.

	35 class MoreBit : public BitField8<bool, 7, 1> {};

	36 class ValueBits : public BitField8<int, 0, 7> {};

	37 class TypeBit : public BitField8<bool, 6, 1> {};

	38 class TypeValueBits : public BitField8<int, 0, 6> {};

	39

	40 // Helper: Number of bytes for given value in 'value' bytes encoding.

	41 int SizeOfValue(int value) {

	42 DCHECK(value >= 0);

	43 int size = 1;

	44 while (value >= (1 << (size * ValueBits::kSize))) size++;

	45 return size;

	46 }

	47

	48 // Helper: Number of bytes for given value (in 'type + value') bytes encoding.

	49 int SizeOfTypeValue(int value) {

	50 DCHECK(value >= 0);

	51 int size = 1;

	52 while (value >= (1 << (TypeValueBits::kSize + (size - 1) * ValueBits::kSize)))

	53 size++;

	54 return size;

	55 }

	56

	57 // Helper: Encode one 'normal' byte.

	58 void EncodeByte(ZoneVector<byte>& bytes, bool more, int value) {

	59 bytes.push_back(MoreBit::encode(more) \|
	rmcilroy 2016/02/18 14:29:53 DCHECK value is less than 7 bits? DCHECK value is less than 7 bits?
	60 ValueBits::encode(value & ValueBits::kMax));

	61 }

	62

	63 // Helper: Encode a byte w/ a type value.

	64 void EncodeTypeByte(ZoneVector<byte>& bytes, bool more, bool type, int value) {

	65 bytes.push_back(MoreBit::encode(more) \| TypeBit::encode(type) \|
	rmcilroy 2016/02/18 14:29:53 Ditto, but 6 bits? Ditto, but 6 bits?
	66 TypeValueBits::encode(value & TypeValueBits::kMax));

	67 }

	68

	69 // Helper: Encode a positive integer w/ the given size.

	70 void EncodeBytes(ZoneVector<byte>& bytes, int value, int size) {

	71 DCHECK(value >= 0);

	72 for (int i = 0; i < size; i++) {

	73 int byte_no = size - i - 1;

	74 EncodeByte(bytes, byte_no != 0, value >> (ValueBits::kSize * byte_no));

	75 }

	76 }

	77

	78 // Encode an integer.

	79 void EncodeValue(ZoneVector<byte>& bytes, int value) {

	80 if (value < 0) {

	81 DCHECK(value != INT_MIN);

	82 EncodeByte(bytes, true, 0);

	83 value = -value;

	84 }

	85 EncodeBytes(bytes, value, SizeOfValue(value));

	86 }

	87

	88 // Encode an integer and a type bit.

	89 void EncodeTypeValue(ZoneVector<byte>& bytes, bool type, int value) {

	90 if (value < 0) {

	91 DCHECK(value != INT_MIN);

	92 EncodeTypeByte(bytes, true, type, 0);

	93 EncodeTypeValue(bytes, false, -value);

	94 } else {

	95 int size = SizeOfTypeValue(value);

	96 EncodeTypeByte(bytes, (size > 1), type,

	97 value >> (size - 1) * ValueBits::kSize);

	98 EncodeBytes(bytes, value, size - 1);
	rmcilroy 2016/02/18 14:29:53 This is a bit magic, i.e., you are relying on the This is a bit magic, i.e., you are relying on the fact that size is less than what is required for EncodeBytes of this value and that it will therefore only encode the bottom bytes of the value. I think this would all be a lot simpler if you did the seperation between TypeValues and Values at a higher level, and just feed the encoding helpers raw ints which it encodes uniformly with the variable byte compression scheme. To do this, you might want to have a typed value have value first, and be shifted up by one with the type bit in the LSB of the value (to keep the values small for this compression scheme). WDYT? vogelheim 2016/02/18 17:11:19 Not sure... My intent was this: The methods call Show quoted text On 2016/02/18 14:29:53, rmcilroy wrote: > This is a bit magic, i.e., you are relying on the fact that size is less than > what is required for EncodeBytes of this value and that it will therefore only > encode the bottom bytes of the value. I think this would all be a lot simpler if > you did the seperation between TypeValues and Values at a higher level, and just > feed the encoding helpers raw ints which it encodes uniformly with the variable > byte compression scheme. To do this, you might want to have a typed value have > value first, and be shifted up by one with the type bit in the LSB of the value > (to keep the values small for this compression scheme). WDYT? Not sure... My intent was this: The methods called EncodeValue (also DecodeValue) have a sane interface and will safely encode the value you put in. However, if I want to avoid code duplication, I also need some low level helper functions that aren't safe, and that will rely on their parameters being just right. Those are the EncodeByte versions. The anonymous namespace provides some degree of protection against mis-use. EncodeBytes was originally called EncodeRemainder, which might make its purpose a bit more clear. I think I'll just implement an alternative as you suggest, and see if I like it better...
	99 }

	100 }

	101

	102 // Helper: Decode a series of 'value' bytes. Assume the current byte has

	103 // been decoded, but use it to decide whether we need more bytes.

	104 void DecodeBytes(const byte* bytes, int* index, int& value) {

	105 byte current = bytes[(*index)++];

	106 while (MoreBit::decode(current)) {

	107 current = bytes[(*index)++];

	108 value = (value << ValueBits::kSize) + ValueBits::decode(current);

	109 }

	110 }

	111

	112 // Decode an integer at bytes.get(index). Inverse of EncodeValue.

	113 int DecodeValue(const byte* bytes, int* index) {

	114 byte current = bytes[*index];

	115 int val = ValueBits::decode(current);

	116 bool sign = (val == 0);

	117 DecodeBytes(bytes, index, val);

	118 return sign ? -val : val;

	119 }

	120

	121 // Decode an integer and a type bit at bytes.get(index). Inverse of

	122 // EncodeTypeValue.

	123 int DecodeTypeValue(const byte* bytes, int* index, bool* type) {

	124 int old_index = *index;

	125 byte current = bytes[*index];

	126 *type = TypeBit::decode(current);

	127 int val = TypeValueBits::decode(current);

	128 DecodeBytes(bytes, index, val);

	129 bool sign = (*index - old_index) > SizeOfTypeValue(val);

	130 return sign ? -val : val;

	131 }

	132

	133 // Helper: Scan one value backwards.

	134 // *index is expected to point behind the current 'complete' value encoding

	135 // and will be set to point behind he previous one.

	136 void EncodeScanBackwards(ZoneVector<byte>& bytes, int* index) {

	137 DCHECK(bytes.size() > 0);

	138 DCHECK(*index > 0);

	139

	140 // Index is expected to point behind a 'complete' value encoding.

	141 (*index)--;

	142 DCHECK(!MoreBit::decode(bytes[*index]));

	143

	144 // Scan back until we either hit the beginning of our bytes, or another byte

	145 // without 'more' bit.

	146 while (index > 0 && MoreBit::decode(bytes[(index) - 1])) {

	147 (*index)--;

	148 }

	149 DCHECK(*index >= 0);

	150 }

	151

	152 } // namespace

	153

	154 // SourcePositionTableBuilder

17	155

18 void SourcePositionTableBuilder::AddStatementPosition(size_t bytecode_offset,	156 void SourcePositionTableBuilder::AddStatementPosition(size_t bytecode_offset,

19 int source_position) {	157 int source_position) {

20 int offset = static_cast<int>(bytecode_offset);	158 AddEntry({static_cast<int>(bytecode_offset), source_position, true});

21 // If a position has already been assigned to this bytecode offset,

22 // do not reassign a new statement position.

23 if (CodeOffsetHasPosition(offset)) return;

24 uint32_t encoded = IsStatementField::encode(true) \|

25 SourcePositionField::encode(source_position);

26 entries_.push_back({offset, encoded});

27 }	159 }

28	160

29 void SourcePositionTableBuilder::AddExpressionPosition(size_t bytecode_offset,	161 void SourcePositionTableBuilder::AddExpressionPosition(size_t bytecode_offset,

30 int source_position) {	162 int source_position) {

31 int offset = static_cast<int>(bytecode_offset);	163 AddEntry({static_cast<int>(bytecode_offset), source_position, false});

32 // If a position has already been assigned to this bytecode offset,	164 }

33 // do not reassign a new statement position.	165

34 if (CodeOffsetHasPosition(offset)) return;	166 void SourcePositionTableBuilder::AddEntry(const PositionTableEntry& entry) {

35 uint32_t encoded = IsStatementField::encode(false) \|	167 if (!CodeOffsetHasPosition(entry.bytecode_offset)) {

36 SourcePositionField::encode(source_position);	168 EncodeTypeValue(bytes_, entry.is_statement,

37 entries_.push_back({offset, encoded});	169 entry.bytecode_offset - previous_bytecode_offset_);

	170 EncodeValue(bytes_, entry.source_position - previous_source_position_);

	171

	172 previous_bytecode_offset_ = entry.bytecode_offset;

	173 previous_source_position_ = entry.source_position;

	174

	175 #ifdef ENABLE_SLOW_DCHECKS

	176 raw_entries_.push_back(entry);

	177 #endif

	178 }

38 }	179 }

39	180

40 void SourcePositionTableBuilder::RevertPosition(size_t bytecode_offset) {	181 void SourcePositionTableBuilder::RevertPosition(size_t bytecode_offset) {

41 int offset = static_cast<int>(bytecode_offset);	182 int offset = static_cast<int>(bytecode_offset);

42 // If we already added a source position table entry, but the bytecode array	183 // If we already added a source position table entry, but the bytecode array

43 // builder ended up not outputting a bytecode for the corresponding bytecode	184 // builder ended up not outputting a bytecode for the corresponding bytecode

44 // offset, we have to remove that entry.	185 // offset, we have to remove that entry.

45 if (CodeOffsetHasPosition(offset)) entries_.pop_back();	186 if (CodeOffsetHasPosition(offset)) {

	187 // Reverting means we need to discard 2 variable length ints. We also need
	rmcilroy 2016/02/18 14:29:53 Hmm, I now like this RevertPosition function even Hmm, I now like this RevertPosition function even less than when it was originally added :/. Could we remove it and instead have the bytecode-array-builder keep track of the last source / expression position and only add it to the source position table if a bytecode gets emitted (i.e., throw it away if exit_seen_in_block_ where we currently call RevertPosition)?
	188 // to read them, in order to fixup the previous_*_ member variables.

	189

	190 // Scan backwards for 2 values.

	191 int index = static_cast<int>(bytes_.size());

	192 EncodeScanBackwards(bytes_, &index);

	193 EncodeScanBackwards(bytes_, &index);

	194

	195 // Read from index to adjust previous_* values, but throw away all other

	196 // info.

	197 {

	198 int tmp_index = index;

	199 bool tmp_type;

	200 previous_bytecode_offset_ -=

	201 DecodeTypeValue(&*bytes_.begin(), &tmp_index, &tmp_type);

	202 previous_source_position_ -= DecodeValue(&*bytes_.begin(), &tmp_index);

	203 }

	204

	205 bytes_.resize(index);

	206

	207 #ifdef ENABLE_SLOW_DCHECKS

	208 raw_entries_.pop_back();

	209 #endif

	210 }

46 }	211 }

47	212

48 Handle<FixedArray> SourcePositionTableBuilder::ToFixedArray() {	213 Handle<ByteArray> SourcePositionTableBuilder::ToSourcePositionTable() {

49 int length = static_cast<int>(entries_.size());	214 Handle<ByteArray> table = isolate_->factory()->NewByteArray(

50 Handle<FixedArray> table =	215 static_cast<int>(bytes_.size()), TENURED);

51 isolate_->factory()->NewFixedArray(length * 2, TENURED);	216 if (bytes_.empty()) return table;

52 for (int i = 0; i < length; i++) {	217

53 table->set(i * 2, Smi::FromInt(entries_[i].bytecode_offset));	218 MemCopy(table->GetDataStartAddress(), &*bytes_.begin(), bytes_.size());

54 table->set(i * 2 + 1, Smi::FromInt(entries_[i].source_position_and_type));	219

	220 #ifdef ENABLE_SLOW_DCHECKS

	221 // Brute force testing: Record all positions and decode

	222 // the entire table to verify they are identical.

	223 auto raw = raw_entries_.begin();

	224 for (SourcePositionTableIterator encoded(*table); !encoded.done();

	225 encoded.Advance(), raw++) {

	226 DCHECK(raw != raw_entries_.end());

	227 DCHECK_EQ(encoded.bytecode_offset(), raw->bytecode_offset);

	228 DCHECK_EQ(encoded.source_position(), raw->source_position);

	229 DCHECK_EQ(encoded.is_statement(), raw->is_statement);

55 }	230 }

	231 DCHECK(raw == raw_entries_.end());

	232 #endif

	233

56 return table;	234 return table;

57 }	235 }

58	236

59 SourcePositionTableIterator::SourcePositionTableIterator(	237 // SourcePositionTableIterator

60 BytecodeArray* bytecode_array)	238

61 : table_(bytecode_array->source_position_table()),	239 SourcePositionTableIterator::SourcePositionTableIterator(ByteArray* byte_array)

	240 : table_(byte_array),

62 index_(0),	241 index_(0),

63 length_(table_->length()) {	242 previous_bytecode_offset_(0),

64 DCHECK(table_->length() % 2 == 0);	243 previous_source_position_(0),

	244 current_({0, 0, false}) {

65 Advance();	245 Advance();

66 }	246 }

67	247

68 void SourcePositionTableIterator::Advance() {	248 void SourcePositionTableIterator::Advance() {

69 if (index_ < length_) {	249 DCHECK(!done());

70 int new_bytecode_offset = Smi::cast(table_->get(index_))->value();	250 DCHECK(index_ >= 0 && index_ <= table_->length());

71 // Bytecode offsets are in ascending order.	251 if (index_ == table_->length()) {

72 DCHECK(bytecode_offset_ < new_bytecode_offset \|\| index_ == 0);	252 index_ = kDone;

73 bytecode_offset_ = new_bytecode_offset;	253 } else {

74 uint32_t source_position_and_type =	254 byte* bytes = table_->GetDataStartAddress();

75 static_cast<uint32_t>(Smi::cast(table_->get(index_ + 1))->value());	255

76 is_statement_ = IsStatementField::decode(source_position_and_type);	256 previous_bytecode_offset_ +=

77 source_position_ = SourcePositionField::decode(source_position_and_type);	257 DecodeTypeValue(bytes, &index_, &(current_.is_statement));

	258 previous_source_position_ += DecodeValue(bytes, &index_);

	259

	260 current_.bytecode_offset = previous_bytecode_offset_;

	261 current_.source_position = previous_source_position_;

78 }	262 }

79 index_ += 2;

80 }	263 }

81	264

82 } // namespace interpreter	265 } // namespace interpreter

83 } // namespace internal	266 } // namespace internal

84 } // namespace v8	267 } // namespace v8

OLD	NEW

« no previous file with comments | « src/interpreter/source-position-table.h ('k') | src/objects.h » ('j') | no next file with comments »