src/interpreter/source-position-table.cc - Issue 1704943002: Encode interpreter::SourcePositionTable as variable-length ints.

Unified Diff: src/interpreter/source-position-table.cc

Issue 1704943002: Encode interpreter::SourcePositionTable as variable-length ints. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Fix iterator. Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« src/interpreter/source-position-table.h ('K') | « src/interpreter/source-position-table.h ('k') | src/objects.h » ('j') | test/unittests/interpreter/source-position-table-unittest.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: src/interpreter/source-position-table.cc

diff --git a/src/interpreter/source-position-table.cc b/src/interpreter/source-position-table.cc

index 0b7c44e2d984a50eb8169af7daba6ce8af096854..2f1e4d65d49cba074185843a9b59d9a1faa7c740 100644

--- a/src/interpreter/source-position-table.cc

+++ b/src/interpreter/source-position-table.cc

@@ -4,7 +4,6 @@

#include "src/interpreter/source-position-table.h"

-#include "src/assembler.h"

#include "src/objects-inl.h"

#include "src/objects.h"

@@ -12,29 +11,215 @@ namespace v8 {

namespace internal {

namespace interpreter {

-class IsStatementField : public BitField<bool, 0, 1> {};

-class SourcePositionField : public BitField<int, 1, 30> {};

+// We'll use a simple encoding scheme to record the source positions.

+// Conceptually, each position consists of:

+// - bytecode_offset: An integer index into the BytecodeArray

+// - source_position: An integer index into the source string.

+// - position type: Each position is either a statement or an expression.

+//

+// The basic idea for the encoding is to use a variable-length integer coding,

+// where each byte contains 7 bits of payload data, and 1 'more' bit that

+// determines whether additional bytes follow. Additionally:

+// - we record the difference from the previous position,

+// - we just stuff one bit for the type into the bytecode offset,

+// - negative numbers occur only rarely, so we use a denormalized lead byte

+// (a lead byte with all zeros, which normally wouldn't make any sense)

+// to encode a negative sign, so that we 'pay' nothing for positive numbers,

+// but have to pay a full byte for negative integers.

+namespace {

+// We'll use the BitField<..> template to handle the actual encoding.

+// Most bytes will be a MoreBit w/ ValueBits. If we also want to encode

+// the type, we'll have a byte with MoreBit, TypeBit, and TypeValueBits,

+// followed by zero or more 'normal' bytes.

+class MoreBit : public BitField8<bool, 7, 1> {};

+class ValueBits : public BitField8<int, 0, 7> {};

+class TypeBit : public BitField8<bool, 6, 1> {};

Yang 2016/02/17 19:54:51 Since we always stuff the type bit into the byteco

vogelheim 2016/02/18 13:06:09 I disagree. Only the first byte is different; all

Yang 2016/02/19 08:28:02 I see. Thanks for the explanation.

+class TypeValueBits : public BitField8<int, 0, 6> {};

+// Helper: Number of bytes for given value in 'value' bytes encoding.

+int SizeOfValue(int value) {

+ DCHECK(value >= 0);

+ int size = 1;

+ while (value >= (1 << size * ValueBits::kSize)) size++;

Yang 2016/02/17 19:54:51 Let's use brackets around the multiplication.

vogelheim 2016/02/18 13:06:09 Done.

+ return size;

+// Helper: Number of bytes for given value (in 'type + value') bytes encoding.

+int SizeOfTypeValue(int value) {

+ DCHECK(value >= 0);

+ int size = 1;

+ while (value >= (1 << (TypeValueBits::kSize + (size - 1) * ValueBits::kSize)))

+ size++;

+ return size;

+// Helper: Encode one 'normal' byte.

+void EncodeByte(ZoneVector<byte>& bytes, bool more, int value) {

+ bytes.push_back(MoreBit::encode(more) |

+ ValueBits::encode(value & ValueBits::kMax));

+// Helper: Encode a byte w/ a flag value.

Yang 2016/02/17 19:54:51 Can we call this 'type' instead of 'flag' to be co

vogelheim 2016/02/18 13:06:09 Done.

+void EncodeFlagByte(ZoneVector<byte>& bytes, bool more, bool flag, int value) {

+ bytes.push_back(MoreBit::encode(more) | TypeBit::encode(flag) |

+ TypeValueBits::encode(value & TypeValueBits::kMax));

+// Helper: Encode a positive integer w/ the given size.

+void EncodeBytes(ZoneVector<byte>& bytes, int value, int size) {

+ DCHECK(value >= 0);

+ for (int i = 0; i < size; i++) {

+ int byte_no = size - i - 1;

+ EncodeByte(bytes, byte_no != 0, value >> (ValueBits::kSize * byte_no));

+ }

+// Encode an integer.

+void EncodeValue(ZoneVector<byte>& bytes, int value) {

+ if (value < 0) {

+ EncodeByte(bytes, true, 0);

+ value = -value;

Yang 2016/02/17 19:54:51 Let's assert that value is not kIntMin. Or else th

vogelheim 2016/02/18 13:06:09 Done.

+ }

+ EncodeBytes(bytes, value, SizeOfValue(value));

+// Encode an integer and a flag.

+void EncodeFlagValue(ZoneVector<byte>& bytes, bool flag, int value) {

+ if (value < 0) {

+ EncodeFlagByte(bytes, true, flag, 0);

+ EncodeFlagValue(bytes, false, -value);

+ } else {

+ int size = SizeOfTypeValue(value);

+ EncodeFlagByte(bytes, (size > 1), flag,

+ value >> (size - 1) * ValueBits::kSize);

+ EncodeBytes(bytes, value, size - 1);

+ }

+// Helper: Decode a series of 'value' bytes. Assume the current byte has

+// been decoded, but use it to decide whether we need more bytes.

+void DecodeBytes(const byte* bytes, int* index, int& value) {

+ byte current = bytes[(*index)++];

+ while (MoreBit::decode(current)) {

+ current = bytes[(*index)++];

+ value = (value << ValueBits::kSize) + ValueBits::decode(current);

+ }

+// Decode an integer at bytes.get(index). Inverse of EncodeValue.

+int DecodeValue(const byte* bytes, int* index) {

+ byte current = bytes[*index];

+ int val = ValueBits::decode(current);

+ bool sign = (val == 0);

+ DecodeBytes(bytes, index, val);

+ return sign ? -val : val;

+// Decode an integer and a flag at bytes.get(index). Inverse of

+// EncodeFlagValue.

+int DecodeFlagValue(const byte* bytes, int* index, bool* flag) {

+ int old_index = *index;

+ byte current = bytes[*index];

+ *flag = TypeBit::decode(current);

+ int val = TypeValueBits::decode(current);

+ DecodeBytes(bytes, index, val);

+ bool sign = (*index - old_index) > SizeOfTypeValue(val);

+ return sign ? -val : val;

+// Helper: Scan one value backwards.

+// *index is expected to point behind the current 'complete' value encoding

+// and will be set to point behind he previous one.

+void EncodeScanBackwards(ZoneVector<byte>& bytes, int* index) {

+ DCHECK(bytes.size() > 0);

+ DCHECK(*index > 0);

+ // Index is expected to point behind a 'complete' value encoding.

+ (*index)--;

+ DCHECK(!MoreBit::decode(bytes[*index]));

+ // Scan back until we either hit the beginning of our bytes, or another byte

+ // without 'more' bit.

+ while (*index > 0 && MoreBit::decode(bytes[(*index) - 1])) {

+ (*index)--;

+ }

+ DCHECK(*index >= 0);

+} // namespace

+// SourcePositionTableCodec

Yang 2016/02/17 19:54:51 I don't really see the value of having a standalon

vogelheim 2016/02/18 13:06:09 Done.

+SourcePositionTableCodec::SourcePositionTableCodec()

+ : previous_bytecode_offset_(0), previous_source_position_(0) {}

+SourcePositionTableCodec::~SourcePositionTableCodec() {}

+void SourcePositionTableCodec::Encode(ZoneVector<byte>& bytes,

+ PositionTableEntry entry) {

+ EncodeFlagValue(bytes, entry.is_statement,

+ entry.bytecode_offset - previous_bytecode_offset_);

+ EncodeValue(bytes, entry.source_position - previous_source_position_);

+ previous_bytecode_offset_ = entry.bytecode_offset;

+ previous_source_position_ = entry.source_position;

+void SourcePositionTableCodec::EncodeRevertPosition(ZoneVector<byte>& bytes,

+ int bytecode_offset) {

+ if (bytes.size() == 0 || bytecode_offset != previous_bytecode_offset_) return;

+ // Reverting means we need to discard 2 variable length ints. We also need

+ // to read them, in order to fixup the previous_*_ member variables.

+ // Scan backwards for 2 values.

+ int index = static_cast<int>(bytes.size());

+ EncodeScanBackwards(bytes, &index);

+ // Read from index to adjust previous_* values, but throw away all other

+ // info.

+ {

+ int tmp_index = index;

+ bool tmp_flag;

+ previous_bytecode_offset_ -=

+ DecodeFlagValue(&*bytes.begin(), &tmp_index, &tmp_flag);

+ previous_source_position_ -= DecodeValue(&*bytes.begin(), &tmp_index);

+ }

+ bytes.resize(index);

+void SourcePositionTableCodec::Decode(const byte* bytes, int* index,

+ PositionTableEntry* entry) {

+ previous_bytecode_offset_ +=

+ DecodeFlagValue(bytes, index, &(entry->is_statement));

+ previous_source_position_ += DecodeValue(bytes, index);

+ entry->bytecode_offset = previous_bytecode_offset_;

+ entry->source_position = previous_source_position_;

+// SourcePositionTableBuilder

void SourcePositionTableBuilder::AddStatementPosition(size_t bytecode_offset,

int source_position) {

- int offset = static_cast<int>(bytecode_offset);

- // If a position has already been assigned to this bytecode offset,

- // do not reassign a new statement position.

- if (CodeOffsetHasPosition(offset)) return;

- uint32_t encoded = IsStatementField::encode(true) |

- SourcePositionField::encode(source_position);

- entries_.push_back({offset, encoded});

+ AddEntry({static_cast<int>(bytecode_offset), source_position, true});

}

void SourcePositionTableBuilder::AddExpressionPosition(size_t bytecode_offset,

int source_position) {

- int offset = static_cast<int>(bytecode_offset);

- // If a position has already been assigned to this bytecode offset,

- // do not reassign a new statement position.

- if (CodeOffsetHasPosition(offset)) return;

- uint32_t encoded = IsStatementField::encode(false) |

- SourcePositionField::encode(source_position);

- entries_.push_back({offset, encoded});

+ AddEntry({static_cast<int>(bytecode_offset), source_position, false});

+void SourcePositionTableBuilder::AddEntry(const PositionTableEntry& entry) {

+ if (!CodeOffsetHasPosition(entry.bytecode_offset)) {

+ codec_.Encode(bytes_, entry);

+#ifdef ENABLE_SLOW_DCHECKS

+ raw_entries_.push_back(entry);

+#endif

+ }

}

void SourcePositionTableBuilder::RevertPosition(size_t bytecode_offset) {

@@ -42,41 +227,57 @@ void SourcePositionTableBuilder::RevertPosition(size_t bytecode_offset) {

// If we already added a source position table entry, but the bytecode array

// builder ended up not outputting a bytecode for the corresponding bytecode

// offset, we have to remove that entry.

- if (CodeOffsetHasPosition(offset)) entries_.pop_back();

+ if (CodeOffsetHasPosition(offset)) {

+ codec_.EncodeRevertPosition(bytes_, offset);

+#ifdef ENABLE_SLOW_DCHECKS

+ raw_entries_.pop_back();

+#endif

+ }

}

-Handle<FixedArray> SourcePositionTableBuilder::ToFixedArray() {

- int length = static_cast<int>(entries_.size());

- Handle<FixedArray> table =

- isolate_->factory()->NewFixedArray(length * 2, TENURED);

- for (int i = 0; i < length; i++) {

- table->set(i * 2, Smi::FromInt(entries_[i].bytecode_offset));

- table->set(i * 2 + 1, Smi::FromInt(entries_[i].source_position_and_type));

+Handle<ByteArray> SourcePositionTableBuilder::ToSourcePositionTable() {

+ Handle<ByteArray> table = isolate_->factory()->NewByteArray(

+ static_cast<int>(bytes_.size()), TENURED);

+ if (bytes_.empty()) return table;

+ MemCopy(table->GetDataStartAddress(), &*bytes_.begin(), bytes_.size());

+#ifdef ENABLE_SLOW_DCHECKS

+ // Brute force testing: Record all positions and decode

+ // the entire table to verify they are identical.

+ SourcePositionTableIterator encoded(*table);

+ auto raw = raw_entries_.begin();

+ for (; !encoded.done(); encoded.Advance(), raw++) {

+ DCHECK(raw != raw_entries_.end());

+ DCHECK_EQ(encoded.bytecode_offset(), raw->bytecode_offset);

+ DCHECK_EQ(encoded.source_position(), raw->source_position);

+ DCHECK_EQ(encoded.is_statement(), raw->is_statement);

}

+ DCHECK(raw == raw_entries_.end());

+#endif

return table;

}

+// SourcePositionTableIterator

SourcePositionTableIterator::SourcePositionTableIterator(

BytecodeArray* bytecode_array)

- : table_(bytecode_array->source_position_table()),

- index_(0),

- length_(table_->length()) {

- DCHECK(table_->length() % 2 == 0);

+ : SourcePositionTableIterator(bytecode_array->source_position_table()) {}

+SourcePositionTableIterator::SourcePositionTableIterator(ByteArray* byte_array)

+ : table_(byte_array), index_(0), current_({0, 0, false}) {

Advance();

}

void SourcePositionTableIterator::Advance() {

- if (index_ < length_) {

- int new_bytecode_offset = Smi::cast(table_->get(index_))->value();

- // Bytecode offsets are in ascending order.

- DCHECK(bytecode_offset_ < new_bytecode_offset || index_ == 0);

- bytecode_offset_ = new_bytecode_offset;

- uint32_t source_position_and_type =

- static_cast<uint32_t>(Smi::cast(table_->get(index_ + 1))->value());

- is_statement_ = IsStatementField::decode(source_position_and_type);

- source_position_ = SourcePositionField::decode(source_position_and_type);

+ DCHECK(!done());

+ DCHECK(index_ >= 0 && index_ <= table_->length());

+ if (index_ == table_->length()) {

+ index_ = kDone;

+ } else {

+ codec_.Decode(table_->GetDataStartAddress(), &index_, &current_);

}

- index_ += 2;

}

} // namespace interpreter

« src/interpreter/source-position-table.h ('K') | « src/interpreter/source-position-table.h ('k') | src/objects.h » ('j') | test/unittests/interpreter/source-position-table-unittest.cc » ('J')