src/api.cc - Issue 9689065: Benchmarkify pointer swapping string encoder

Unified Diff: src/api.cc

Issue 9689065: Benchmarkify pointer swapping string encoder

Patch Set: Created 8 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: src/api.cc

diff --git a/src/api.cc b/src/api.cc

index 27d40867baa3a7daa54b7913598f56e54871709e..cfa319388c0cfae79dfcba6abebbba872621f63c 100644

--- a/src/api.cc

+++ b/src/api.cc

@@ -3687,6 +3687,242 @@ int String::Length() const {

}

+/**

+ * Provides direct read access to string memory. The user has to be aware that

+ * each buffer returned might contain either 8-bit or 16-bit characters. As

+ * long as the iterator exists no other interaction with the v8 heap is

+ * allowed, because the heap might be in inconsistent state.

+ *

+ * Indended usage:

+ * for (String::ReadMemory it(string); *it; it.Next()) {

+ * switch (it.storage_type()) {

+ * case String::ReadMemory::kAscii:

+ * // Process ascii piece here.

+ * break;

+ * case String::ReadMemory::kTwoByte:

+ " // Process ucs-2 piece here.

+ * break;

+ * }

+ */

+class ReadMemory {

+ static const int kCurrentIsSecondTag = 1;

+ static const int kParentStackSize = 1024;

+ public:

+ enum StorageType {

+ kNone = 0,

+ kAscii = 1,

+ kTwoByte = 2

+ };

+ explicit ReadMemory(i::Handle<i::String> obj);

+ ~ReadMemory() {

+ if (ptr_ != NULL) {

+ rewind();

+ }

+ const void* operator*() { return ptr_; }

+ int length() { return length_; }

+ StorageType storage_type() { return storage_type_; }

+ bool Next() {

+ if (ptr_ != NULL) {

+ next();

+ }

+ return ptr_ != NULL;

+ }

+ private:

+ void next();

+ void rewind();

+ void down();

+ void set_flat(v8::internal::String* flat);

+ void set_end();

+ void push_parent(bool second);

+ void pop_parent();

+ const void* ptr_;

+ int length_;

+ StorageType storage_type_;

+ v8::internal::ConsString* current_;

+ intptr_t parent_;

+ bool did_visit_second_;

+ int depth_;

+ intptr_t parents_[kParentStackSize];

+ // Disallow copying and assigning.

+ ReadMemory(const ReadMemory&);

+ void operator=(const ReadMemory&);

+};

+enum ParseMode {

+ kComputeLength,

+ kCopyUnchecked,

+ kCopyChecked

+};

+template <ParseMode mode, int count, typename T, typename C>

+static inline bool emit(T*& dest_pos, T const* dest_end, C c0, C c1 = 0, C c2 = 0, C c3 = 0) {

+ ASSERT(count >= 1 && count <= 4);

+ if (mode == kCopyChecked && dest_end - dest_pos < count) {

+ return false;

+ }

+ if (mode == kComputeLength) {

+ dest_pos += count;

+ } else {

+ *(dest_pos++) = static_cast<T>(c0);

+ if (count >= 2) *(dest_pos++) = static_cast<T>(c1);

+ if (count >= 3) *(dest_pos++) = static_cast<T>(c2);

+ if (count >= 4) *(dest_pos++) = static_cast<T>(c3);

+ }

+ return true;

+template <ParseMode mode>

+static inline int string_to_utf8(i::Handle<i::String> value, char* dest, int dest_size, int* nchars) {

+#define EMIT(n, ...) \

+ do { \

+ if (!emit<mode, n>(dest_pos, dest_end, __VA_ARGS__)) { goto out; } \

+ } while (0)

+ char* dest_pos = dest;

+ char* dest_end = dest + dest_size;

+ uint16_t lead_surrogate = 0;

+ *nchars = 0;

+ for(ReadMemory it(value); *it; it.Next()) {

+ switch (it.storage_type()) {

+ case ReadMemory::kAscii: {

+ // If the previous iteration stopped halfway inside a surrogate

+ // pair, emit replacement character and reset.

+ if (lead_surrogate) {

+ if (mode != kComputeLength) {

+ EMIT(3, 0xef, 0xbf, 0xbd);

+ }

+ lead_surrogate = 0;

+ }

+ // Use memcpy to copy the ascii string.

+ int tocopy = it.length();

+ if (mode == kCopyChecked && tocopy > (dest_end - dest_pos)) {

+ tocopy = dest_end - dest_pos;

+ if (tocopy == 0) {

+ goto out;

+ }

+ if (mode != kComputeLength) {

+ // Use memcpy() only if the number of bytes to copy exceeds 8 pointers.

+ if (tocopy > 8 * sizeof(intptr_t)) {

+ memcpy(dest_pos, *it, tocopy);

+ dest_pos += tocopy;

+ } else {

+ const char* pos = reinterpret_cast<const char*>(*it);

+ const char* end = pos + tocopy;

+ for ( ; pos <= end - sizeof(intptr_t); pos += sizeof(intptr_t)) {

+ *reinterpret_cast<intptr_t*>(dest_pos) = *reinterpret_cast<const intptr_t*>(pos);

+ dest_pos += sizeof(intptr_t);

+ }

+ for ( ; pos < end; pos++) {

+ *(dest_pos++) = *pos;

+ }

+ } else {

+ dest_pos += tocopy;

+ }

+ *nchars += tocopy;

+ break;

+ }

+ case ReadMemory::kTwoByte: {

+ const uint16_t* src = reinterpret_cast<const uint16_t*>(*it);

+ const uint16_t* src_pos = src;

+ const uint16_t* src_end = src + it.length();

+ // Check if we were left with a lead surrogate from another piece.

+ if (lead_surrogate && src_pos < src_end) {

+ // Now c is supposed to be a high surrogate

+ uint16_t c = *src_pos;

+ if (c >= 0xd800 && c <= 0xdfff) {

+ uint32_t cp = 0x10000 + ((lead_surrogate - 0xd800) << 10) +

+ (c - 0xdc00);

+ ASSERT(cp >= 0x10000 && cp <= 0x10ffff);

+ EMIT(4,

+ 0xe0 | (cp >> 18), // & 0x08

+ 0x80 | ((cp >> 12) & 0x3f),

+ 0x80 | ((cp >> 6) & 0x3f),

+ 0x80 | (cp & 0x3f));

+ lead_surrogate = 0;

+ continue;

+ } else {

+ // Invalid

+ EMIT(3, 0xef, 0xbf, 0xbd);

+ lead_surrogate = 0;

+ }

+ src_pos++;

+ }

+ for ( ; src_pos < src_end; src_pos++) {

+ uint16_t c = *src_pos;

+ if (c < 0x80) {

+ EMIT(1, c);

+ } else if (c < 0x800) {

+ EMIT(2,

+ 0xc0 | (c >> 6), // & 0x1f

+ 0x80 | (c & 0x3f));

+ } else if (c < 0xd800 || c > 0xdfff) {

+ EMIT(3,

+ 0xe0 | (c >> 12), // & 0x0f

+ 0x80 | ((c >> 6) & 0x3f),

+ 0x80 | (c & 0x3f));

+ } else if (c >= 0xdc00) {

+ // Surrogate pair - lead

+ // Try to grab the trail surrogate immediately, so we can move

+ // the lead_surrogate test outside of the loop.

+ if (src_pos + 1 < src_end) {

+ uint16_t c2 = *(src_pos + 1);

+ if (c2 >= 0xd800 && c2 <= 0xdfff) {

+ // Lead surrogate followed by trail surrogate

+ uint32_t cp = 0x10000 + ((c - 0xd800) << 10) +

+ (c2 - 0xdc00);

+ ASSERT(cp >= 0x10000 && cp <= 0x10ffff);

+ EMIT(4,

+ 0xe0 | (cp >> 18), // & 0x08

+ 0x80 | ((cp >> 12) & 0x3f),

+ 0x80 | ((cp >> 6) & 0x3f),

+ 0x80 | (cp & 0x3f));

+ src_pos++;

+ } else {

+ // Invalid surrogate pair.

+ EMIT(3, 0xef, 0xbf, 0xbd);

+ }

+ } else {

+ lead_surrogate = c;

+ }

+ } else {

+ // Surrogate pair - unexpected trail

+ EMIT(3, 0xef, 0xbf, 0xbd);

+ }

+ *nchars += src_pos - src;

+ break;

+ }

+ default:

+ UNREACHABLE();

+ }

+ // Check if the last character parsed was a lead surrogate

+ if (lead_surrogate) {

+ EMIT(3, 0xef, 0xbf, 0xbd);

+ }

+out:

+ return dest_pos - dest;

+#undef EMIT

int String::Utf8Length() const {

i::Handle<i::String> str = Utils::OpenHandle(this);

if (IsDeadCheck(str->GetIsolate(), "v8::String::Utf8Length()")) return 0;

@@ -3703,6 +3939,7 @@ int String::WriteUtf8(char* buffer,

LOG_API(isolate, "String::WriteUtf8");

ENTER_V8(isolate);

i::Handle<i::String> str = Utils::OpenHandle(this);

if (str->IsAsciiRepresentation()) {

int len;

if (capacity == -1) {

@@ -3720,74 +3957,24 @@ int String::WriteUtf8(char* buffer,

return len;

}

- i::StringInputBuffer& write_input_buffer = *isolate->write_input_buffer();

- isolate->string_tracker()->RecordWrite(str);

- if (options & HINT_MANY_WRITES_EXPECTED) {

- // Flatten the string for efficiency. This applies whether we are

- // using StringInputBuffer or Get(i) to access the characters.

- FlattenString(str);

- }

- write_input_buffer.Reset(0, *str);

- int len = str->length();

- // Encode the first K - 3 bytes directly into the buffer since we

- // know there's room for them. If no capacity is given we copy all

- // of them here.

- int fast_end = capacity - (unibrow::Utf8::kMaxEncodedSize - 1);

- int i;

- int pos = 0;

- int nchars = 0;

- int previous = unibrow::Utf16::kNoPreviousCharacter;

- for (i = 0; i < len && (capacity == -1 || pos < fast_end); i++) {

- i::uc32 c = write_input_buffer.GetNext();

- int written = unibrow::Utf8::Encode(buffer + pos, c, previous);

- pos += written;

- nchars++;

- previous = c;

- }

- if (i < len) {

- // For the last characters we need to check the length for each one

- // because they may be longer than the remaining space in the

- // buffer.

- char intermediate[unibrow::Utf8::kMaxEncodedSize];

- for (; i < len && pos < capacity; i++) {

- i::uc32 c = write_input_buffer.GetNext();

- if (unibrow::Utf16::IsTrailSurrogate(c) &&

- unibrow::Utf16::IsLeadSurrogate(previous)) {

- // We can't use the intermediate buffer here because the encoding

- // of surrogate pairs is done under assumption that you can step

- // back and fix the UTF8 stream. Luckily we only need space for one

- // more byte, so there is always space.

- ASSERT(pos < capacity);

- int written = unibrow::Utf8::Encode(buffer + pos, c, previous);

- ASSERT(written == 1);

- pos += written;

- nchars++;

- } else {

- int written =

- unibrow::Utf8::Encode(intermediate,

- c,

- unibrow::Utf16::kNoPreviousCharacter);

- if (pos + written <= capacity) {

- for (int j = 0; j < written; j++)

- buffer[pos + j] = intermediate[j];

- pos += written;

- nchars++;

- } else {

- // We've reached the end of the buffer

- break;

- }

- previous = c;

- }

+ int pos, nchars;

+ if (capacity == -1 || capacity >= str->length() * 3) {

+ pos = string_to_utf8<kCopyUnchecked>(str, buffer, -1, &nchars);

+ } else {

+ pos = string_to_utf8<kCopyChecked>(str, buffer, capacity, &nchars);

}

if (nchars_ref != NULL) *nchars_ref = nchars;

if (!(options & NO_NULL_TERMINATION) &&

- (i == len && (capacity == -1 || pos < capacity)))

+ (nchars == str->length() && (capacity == -1 || pos < capacity)))

buffer[pos++] = '\0';

return pos;

}

int String::WriteAscii(char* buffer,

int start,

int length,

@@ -5312,6 +5499,189 @@ String::Value::~Value() {

i::DeleteArray(str_);

}

+ReadMemory::ReadMemory(i::Handle<i::String> str)

+ : depth_(0) {

+ i::String* istr = *str;

+ if (!i::StringShape(istr).IsCons()) {

+ // Fast case - no need it iterate.

+ did_visit_second_ = true;

+ set_flat(istr);

+ } else {

+ current_ = i::ConsString::cast(istr);

+ down();

+ }

+// MSVC decides not to inline some functions but forcing it to do so saves

+// valuable cycles. Therefore I'm forcing inlining here - hopefully the v8

+// team will not come and bomb my house.

+// The string_inline declaration should probably move to another file.

+#if defined(_MSC_VER)

+#define strong_inline __forceinline

+#elif defined(__GNUC__)

+#define strong_inline __attribute__((always_inline))

+#else

+#define strong_inline inline

+#endif

+strong_inline void ReadMemory::pop_parent() {

+ i::String* child = current_;

+ if (!(parent_ & kCurrentIsSecondTag)) {

+ // Moving up on the left hand side

+ current_ = reinterpret_cast<i::ConsString*>(parent_ + i::kHeapObjectTag);

+ did_visit_second_ = false;

+ } else {

+ // Moving up on the right hand side

+ current_ = reinterpret_cast<i::ConsString*>(parent_ - kCurrentIsSecondTag +

+ i::kHeapObjectTag);

+ did_visit_second_ = true;

+ }

+ if (--depth_ < kParentStackSize) {

+ parent_ = parents_[depth_];

+ } else if (!did_visit_second_) {

+ parent_ = reinterpret_cast<intptr_t>(current_->unchecked_first());

+ current_->set_first(child, i::SKIP_WRITE_BARRIER);

+ } else {

+ parent_ = reinterpret_cast<intptr_t>(current_->unchecked_second());

+ current_->set_second(child, i::SKIP_WRITE_BARRIER);

+ }

+strong_inline void ReadMemory::push_parent(bool second) {

+ if (second && depth_ == 0) {

+ // Optimization: no need to ever go back.

+ return;

+ }

+ if (depth_ < kParentStackSize) {

+ parents_[depth_] = parent_;

+ } else if (!second) {

+ current_->set_first(reinterpret_cast<i::String*>(parent_),

+ i::SKIP_WRITE_BARRIER);

+ } else {

+ current_->set_second(reinterpret_cast<i::String*>(parent_),

+ i::SKIP_WRITE_BARRIER);

+ }

+ if (!second) {

+ parent_ = reinterpret_cast<intptr_t>(current_) - i::kHeapObjectTag;

+ } else {

+ parent_ = reinterpret_cast<intptr_t>(current_) - i::kHeapObjectTag +

+ kCurrentIsSecondTag;

+ }

+ depth_++;

+void ReadMemory::rewind() {

+ // Iteratate to the root and restore all `first` fields.

+ while (depth_ > 0) {

+ pop_parent();

+ }

+inline void ReadMemory::down() {

+ // Iterate downward until a non-cons string is reached.

+ i::String* child = current_->first();

+ while (i::StringShape(child).IsCons()) {

+ push_parent(false);

+ current_ = i::ConsString::cast(child);

+ child = current_->first();

+ }

+ did_visit_second_ = false;

+ set_flat(child);

+void ReadMemory::next() {

+ // Iterate upward until we reach a branch whose right hand side we didn't

+ // visit yet.

+ while (did_visit_second_) {

+ // When we reach the top then bail out

+ if (depth_ == 0) {

+ set_end();

+ return;

+ }

+ pop_parent();

+ }

+ i::String* child = current_->second();

+ if (i::StringShape(child).IsCons()) {

+ push_parent(true);

+ current_ = i::ConsString::cast(child);

+ down();

+ } else {

+ did_visit_second_ = true;

+ set_flat(child);

+ }

+strong_inline void ReadMemory::set_flat(i::String* string) {

+ // Unfortunately String::GetFlatContent is not really inline-friendly.

+ i::StringShape shape(string);

+ if (shape.representation_tag() == i::kSlicedStringTag) {

+ i::SlicedString* slice = i::SlicedString::cast(string);

+ i::String* parent = slice->parent();

+ i::StringShape parent_shape(parent);

+ length_ = slice->length();

+ if (parent_shape.encoding_tag() == i::kAsciiStringTag) {

+ storage_type_ = kAscii;

+ if (parent_shape.representation_tag() == i::kSeqStringTag) {

+ ptr_ = i::SeqAsciiString::cast(parent)->GetChars() + slice->offset();

+ } else {

+ ASSERT(parent_shape.representation_tag() == i::kExternalStringTag);

+ ptr_ = i::ExternalAsciiString::cast(parent)->GetChars() +

+ slice->offset();

+ }

+ } else {

+ ASSERT(parent_shape.encoding_tag() == i::kTwoByteStringTag);

+ storage_type_ = kTwoByte;

+ if (parent_shape.representation_tag() == i::kSeqStringTag) {

+ ptr_ = i::SeqTwoByteString::cast(parent)->GetChars() + slice->offset();

+ } else {

+ ASSERT(parent_shape.representation_tag() == i::kExternalStringTag);

+ ptr_ = i::ExternalTwoByteString::cast(parent)->GetChars() +

+ slice->offset();

+ }

+ } else {

+ length_ = string->length();

+ if (shape.encoding_tag() == i::kAsciiStringTag) {

+ storage_type_ = kAscii;

+ if (shape.representation_tag() == i::kSeqStringTag) {

+ ptr_ = i::SeqAsciiString::cast(string)->GetChars();

+ } else {

+ ASSERT(shape.representation_tag() == i::kExternalStringTag);

+ ptr_ = i::ExternalAsciiString::cast(string)->GetChars();

+ }

+ } else {

+ ASSERT(shape.encoding_tag() == i::kTwoByteStringTag);

+ storage_type_ = kTwoByte;

+ if (shape.representation_tag() == i::kSeqStringTag) {

+ ptr_ = i::SeqTwoByteString::cast(string)->GetChars();

+ } else {

+ ASSERT(shape.representation_tag() == i::kExternalStringTag);

+ ptr_ = i::ExternalTwoByteString::cast(string)->GetChars();

+ }

+// Force inline would be nice here too.

+strong_inline void ReadMemory::set_end() {

+ ptr_ = NULL;

+ length_ = 0;

+ storage_type_ = kNone;

Local<Value> Exception::RangeError(v8::Handle<v8::String> raw_message) {

i::Isolate* isolate = i::Isolate::Current();

LOG_API(isolate, "RangeError");

« no previous file with comments | « include/v8.h ('k') | no next file » | no next file with comments »