Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(88)

Unified Diff: src/api.cc

Issue 9689065: Benchmarkify pointer swapping string encoder
Patch Set: Created 8 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « include/v8.h ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/api.cc
diff --git a/src/api.cc b/src/api.cc
index 27d40867baa3a7daa54b7913598f56e54871709e..cfa319388c0cfae79dfcba6abebbba872621f63c 100644
--- a/src/api.cc
+++ b/src/api.cc
@@ -3687,6 +3687,242 @@ int String::Length() const {
}
+/**
+ * Provides direct read access to string memory. The user has to be aware that
+ * each buffer returned might contain either 8-bit or 16-bit characters. As
+ * long as the iterator exists no other interaction with the v8 heap is
+ * allowed, because the heap might be in inconsistent state.
+ *
+ * Indended usage:
+ * for (String::ReadMemory it(string); *it; it.Next()) {
+ * switch (it.storage_type()) {
+ * case String::ReadMemory::kAscii:
+ * // Process ascii piece here.
+ * break;
+ * case String::ReadMemory::kTwoByte:
+ " // Process ucs-2 piece here.
+ * break;
+ * }
+ * }
+ */
+class ReadMemory {
+ static const int kCurrentIsSecondTag = 1;
+ static const int kParentStackSize = 1024;
+
+ public:
+ enum StorageType {
+ kNone = 0,
+ kAscii = 1,
+ kTwoByte = 2
+ };
+ explicit ReadMemory(i::Handle<i::String> obj);
+ ~ReadMemory() {
+ if (ptr_ != NULL) {
+ rewind();
+ }
+ }
+ const void* operator*() { return ptr_; }
+ int length() { return length_; }
+ StorageType storage_type() { return storage_type_; }
+ bool Next() {
+ if (ptr_ != NULL) {
+ next();
+ }
+ return ptr_ != NULL;
+ }
+
+ private:
+ void next();
+ void rewind();
+ void down();
+ void set_flat(v8::internal::String* flat);
+ void set_end();
+ void push_parent(bool second);
+ void pop_parent();
+
+ const void* ptr_;
+ int length_;
+ StorageType storage_type_;
+ v8::internal::ConsString* current_;
+ intptr_t parent_;
+ bool did_visit_second_;
+ int depth_;
+ intptr_t parents_[kParentStackSize];
+
+ // Disallow copying and assigning.
+ ReadMemory(const ReadMemory&);
+ void operator=(const ReadMemory&);
+};
+
+
+enum ParseMode {
+ kComputeLength,
+ kCopyUnchecked,
+ kCopyChecked
+};
+
+
+template <ParseMode mode, int count, typename T, typename C>
+static inline bool emit(T*& dest_pos, T const* dest_end, C c0, C c1 = 0, C c2 = 0, C c3 = 0) {
+ ASSERT(count >= 1 && count <= 4);
+ if (mode == kCopyChecked && dest_end - dest_pos < count) {
+ return false;
+ }
+ if (mode == kComputeLength) {
+ dest_pos += count;
+ } else {
+ *(dest_pos++) = static_cast<T>(c0);
+ if (count >= 2) *(dest_pos++) = static_cast<T>(c1);
+ if (count >= 3) *(dest_pos++) = static_cast<T>(c2);
+ if (count >= 4) *(dest_pos++) = static_cast<T>(c3);
+ }
+ return true;
+}
+
+
+template <ParseMode mode>
+static inline int string_to_utf8(i::Handle<i::String> value, char* dest, int dest_size, int* nchars) {
+#define EMIT(n, ...) \
+ do { \
+ if (!emit<mode, n>(dest_pos, dest_end, __VA_ARGS__)) { goto out; } \
+ } while (0)
+
+ char* dest_pos = dest;
+ char* dest_end = dest + dest_size;
+ uint16_t lead_surrogate = 0;
+
+ *nchars = 0;
+
+ for(ReadMemory it(value); *it; it.Next()) {
+ switch (it.storage_type()) {
+ case ReadMemory::kAscii: {
+ // If the previous iteration stopped halfway inside a surrogate
+ // pair, emit replacement character and reset.
+ if (lead_surrogate) {
+ if (mode != kComputeLength) {
+ EMIT(3, 0xef, 0xbf, 0xbd);
+ }
+ lead_surrogate = 0;
+ }
+
+ // Use memcpy to copy the ascii string.
+ int tocopy = it.length();
+ if (mode == kCopyChecked && tocopy > (dest_end - dest_pos)) {
+ tocopy = dest_end - dest_pos;
+ if (tocopy == 0) {
+ goto out;
+ }
+ }
+ if (mode != kComputeLength) {
+ // Use memcpy() only if the number of bytes to copy exceeds 8 pointers.
+ if (tocopy > 8 * sizeof(intptr_t)) {
+ memcpy(dest_pos, *it, tocopy);
+ dest_pos += tocopy;
+ } else {
+ const char* pos = reinterpret_cast<const char*>(*it);
+ const char* end = pos + tocopy;
+ for ( ; pos <= end - sizeof(intptr_t); pos += sizeof(intptr_t)) {
+ *reinterpret_cast<intptr_t*>(dest_pos) = *reinterpret_cast<const intptr_t*>(pos);
+ dest_pos += sizeof(intptr_t);
+ }
+ for ( ; pos < end; pos++) {
+ *(dest_pos++) = *pos;
+ }
+ }
+ } else {
+ dest_pos += tocopy;
+ }
+ *nchars += tocopy;
+ break;
+ }
+
+ case ReadMemory::kTwoByte: {
+ const uint16_t* src = reinterpret_cast<const uint16_t*>(*it);
+ const uint16_t* src_pos = src;
+ const uint16_t* src_end = src + it.length();
+ // Check if we were left with a lead surrogate from another piece.
+ if (lead_surrogate && src_pos < src_end) {
+ // Now c is supposed to be a high surrogate
+ uint16_t c = *src_pos;
+ if (c >= 0xd800 && c <= 0xdfff) {
+ uint32_t cp = 0x10000 + ((lead_surrogate - 0xd800) << 10) +
+ (c - 0xdc00);
+ ASSERT(cp >= 0x10000 && cp <= 0x10ffff);
+ EMIT(4,
+ 0xe0 | (cp >> 18), // & 0x08
+ 0x80 | ((cp >> 12) & 0x3f),
+ 0x80 | ((cp >> 6) & 0x3f),
+ 0x80 | (cp & 0x3f));
+ lead_surrogate = 0;
+ continue;
+ } else {
+ // Invalid
+ EMIT(3, 0xef, 0xbf, 0xbd);
+ lead_surrogate = 0;
+ }
+ src_pos++;
+ }
+ for ( ; src_pos < src_end; src_pos++) {
+ uint16_t c = *src_pos;
+ if (c < 0x80) {
+ EMIT(1, c);
+ } else if (c < 0x800) {
+ EMIT(2,
+ 0xc0 | (c >> 6), // & 0x1f
+ 0x80 | (c & 0x3f));
+ } else if (c < 0xd800 || c > 0xdfff) {
+ EMIT(3,
+ 0xe0 | (c >> 12), // & 0x0f
+ 0x80 | ((c >> 6) & 0x3f),
+ 0x80 | (c & 0x3f));
+ } else if (c >= 0xdc00) {
+ // Surrogate pair - lead
+ // Try to grab the trail surrogate immediately, so we can move
+ // the lead_surrogate test outside of the loop.
+ if (src_pos + 1 < src_end) {
+ uint16_t c2 = *(src_pos + 1);
+ if (c2 >= 0xd800 && c2 <= 0xdfff) {
+ // Lead surrogate followed by trail surrogate
+ uint32_t cp = 0x10000 + ((c - 0xd800) << 10) +
+ (c2 - 0xdc00);
+ ASSERT(cp >= 0x10000 && cp <= 0x10ffff);
+ EMIT(4,
+ 0xe0 | (cp >> 18), // & 0x08
+ 0x80 | ((cp >> 12) & 0x3f),
+ 0x80 | ((cp >> 6) & 0x3f),
+ 0x80 | (cp & 0x3f));
+ src_pos++;
+ } else {
+ // Invalid surrogate pair.
+ EMIT(3, 0xef, 0xbf, 0xbd);
+ }
+ } else {
+ lead_surrogate = c;
+ }
+
+ } else {
+ // Surrogate pair - unexpected trail
+ EMIT(3, 0xef, 0xbf, 0xbd);
+ }
+ }
+ *nchars += src_pos - src;
+ break;
+ }
+
+ default:
+ UNREACHABLE();
+ }
+ }
+ // Check if the last character parsed was a lead surrogate
+ if (lead_surrogate) {
+ EMIT(3, 0xef, 0xbf, 0xbd);
+ }
+out:
+ return dest_pos - dest;
+#undef EMIT
+}
+
+
int String::Utf8Length() const {
i::Handle<i::String> str = Utils::OpenHandle(this);
if (IsDeadCheck(str->GetIsolate(), "v8::String::Utf8Length()")) return 0;
@@ -3703,6 +3939,7 @@ int String::WriteUtf8(char* buffer,
LOG_API(isolate, "String::WriteUtf8");
ENTER_V8(isolate);
i::Handle<i::String> str = Utils::OpenHandle(this);
+
if (str->IsAsciiRepresentation()) {
int len;
if (capacity == -1) {
@@ -3720,74 +3957,24 @@ int String::WriteUtf8(char* buffer,
return len;
}
- i::StringInputBuffer& write_input_buffer = *isolate->write_input_buffer();
- isolate->string_tracker()->RecordWrite(str);
- if (options & HINT_MANY_WRITES_EXPECTED) {
- // Flatten the string for efficiency. This applies whether we are
- // using StringInputBuffer or Get(i) to access the characters.
- FlattenString(str);
- }
- write_input_buffer.Reset(0, *str);
- int len = str->length();
- // Encode the first K - 3 bytes directly into the buffer since we
- // know there's room for them. If no capacity is given we copy all
- // of them here.
- int fast_end = capacity - (unibrow::Utf8::kMaxEncodedSize - 1);
- int i;
- int pos = 0;
- int nchars = 0;
- int previous = unibrow::Utf16::kNoPreviousCharacter;
- for (i = 0; i < len && (capacity == -1 || pos < fast_end); i++) {
- i::uc32 c = write_input_buffer.GetNext();
- int written = unibrow::Utf8::Encode(buffer + pos, c, previous);
- pos += written;
- nchars++;
- previous = c;
- }
- if (i < len) {
- // For the last characters we need to check the length for each one
- // because they may be longer than the remaining space in the
- // buffer.
- char intermediate[unibrow::Utf8::kMaxEncodedSize];
- for (; i < len && pos < capacity; i++) {
- i::uc32 c = write_input_buffer.GetNext();
- if (unibrow::Utf16::IsTrailSurrogate(c) &&
- unibrow::Utf16::IsLeadSurrogate(previous)) {
- // We can't use the intermediate buffer here because the encoding
- // of surrogate pairs is done under assumption that you can step
- // back and fix the UTF8 stream. Luckily we only need space for one
- // more byte, so there is always space.
- ASSERT(pos < capacity);
- int written = unibrow::Utf8::Encode(buffer + pos, c, previous);
- ASSERT(written == 1);
- pos += written;
- nchars++;
- } else {
- int written =
- unibrow::Utf8::Encode(intermediate,
- c,
- unibrow::Utf16::kNoPreviousCharacter);
- if (pos + written <= capacity) {
- for (int j = 0; j < written; j++)
- buffer[pos + j] = intermediate[j];
- pos += written;
- nchars++;
- } else {
- // We've reached the end of the buffer
- break;
- }
- }
- previous = c;
- }
+ int pos, nchars;
+ if (capacity == -1 || capacity >= str->length() * 3) {
+ pos = string_to_utf8<kCopyUnchecked>(str, buffer, -1, &nchars);
+ } else {
+ pos = string_to_utf8<kCopyChecked>(str, buffer, capacity, &nchars);
}
+
if (nchars_ref != NULL) *nchars_ref = nchars;
if (!(options & NO_NULL_TERMINATION) &&
- (i == len && (capacity == -1 || pos < capacity)))
+ (nchars == str->length() && (capacity == -1 || pos < capacity)))
buffer[pos++] = '\0';
return pos;
}
+
+
+
int String::WriteAscii(char* buffer,
int start,
int length,
@@ -5312,6 +5499,189 @@ String::Value::~Value() {
i::DeleteArray(str_);
}
+
+ReadMemory::ReadMemory(i::Handle<i::String> str)
+ : depth_(0) {
+ i::String* istr = *str;
+
+ if (!i::StringShape(istr).IsCons()) {
+ // Fast case - no need it iterate.
+ did_visit_second_ = true;
+ set_flat(istr);
+ } else {
+ current_ = i::ConsString::cast(istr);
+ down();
+ }
+}
+
+
+// MSVC decides not to inline some functions but forcing it to do so saves
+// valuable cycles. Therefore I'm forcing inlining here - hopefully the v8
+// team will not come and bomb my house.
+// The string_inline declaration should probably move to another file.
+#if defined(_MSC_VER)
+#define strong_inline __forceinline
+#elif defined(__GNUC__)
+#define strong_inline __attribute__((always_inline))
+#else
+#define strong_inline inline
+#endif
+
+
+strong_inline void ReadMemory::pop_parent() {
+ i::String* child = current_;
+ if (!(parent_ & kCurrentIsSecondTag)) {
+ // Moving up on the left hand side
+ current_ = reinterpret_cast<i::ConsString*>(parent_ + i::kHeapObjectTag);
+ did_visit_second_ = false;
+ } else {
+ // Moving up on the right hand side
+ current_ = reinterpret_cast<i::ConsString*>(parent_ - kCurrentIsSecondTag +
+ i::kHeapObjectTag);
+ did_visit_second_ = true;
+ }
+ if (--depth_ < kParentStackSize) {
+ parent_ = parents_[depth_];
+ } else if (!did_visit_second_) {
+ parent_ = reinterpret_cast<intptr_t>(current_->unchecked_first());
+ current_->set_first(child, i::SKIP_WRITE_BARRIER);
+ } else {
+ parent_ = reinterpret_cast<intptr_t>(current_->unchecked_second());
+ current_->set_second(child, i::SKIP_WRITE_BARRIER);
+ }
+}
+
+
+strong_inline void ReadMemory::push_parent(bool second) {
+ if (second && depth_ == 0) {
+ // Optimization: no need to ever go back.
+ return;
+ }
+ if (depth_ < kParentStackSize) {
+ parents_[depth_] = parent_;
+ } else if (!second) {
+ current_->set_first(reinterpret_cast<i::String*>(parent_),
+ i::SKIP_WRITE_BARRIER);
+ } else {
+ current_->set_second(reinterpret_cast<i::String*>(parent_),
+ i::SKIP_WRITE_BARRIER);
+ }
+ if (!second) {
+ parent_ = reinterpret_cast<intptr_t>(current_) - i::kHeapObjectTag;
+ } else {
+ parent_ = reinterpret_cast<intptr_t>(current_) - i::kHeapObjectTag +
+ kCurrentIsSecondTag;
+ }
+ depth_++;
+}
+
+
+void ReadMemory::rewind() {
+ // Iteratate to the root and restore all `first` fields.
+ while (depth_ > 0) {
+ pop_parent();
+ }
+}
+
+
+inline void ReadMemory::down() {
+ // Iterate downward until a non-cons string is reached.
+ i::String* child = current_->first();
+ while (i::StringShape(child).IsCons()) {
+ push_parent(false);
+ current_ = i::ConsString::cast(child);
+ child = current_->first();
+ }
+ did_visit_second_ = false;
+ set_flat(child);
+}
+
+
+void ReadMemory::next() {
+ // Iterate upward until we reach a branch whose right hand side we didn't
+ // visit yet.
+ while (did_visit_second_) {
+ // When we reach the top then bail out
+ if (depth_ == 0) {
+ set_end();
+ return;
+ }
+ pop_parent();
+ }
+
+
+ i::String* child = current_->second();
+ if (i::StringShape(child).IsCons()) {
+ push_parent(true);
+ current_ = i::ConsString::cast(child);
+ down();
+ } else {
+ did_visit_second_ = true;
+ set_flat(child);
+ }
+}
+
+
+strong_inline void ReadMemory::set_flat(i::String* string) {
+ // Unfortunately String::GetFlatContent is not really inline-friendly.
+ i::StringShape shape(string);
+ if (shape.representation_tag() == i::kSlicedStringTag) {
+ i::SlicedString* slice = i::SlicedString::cast(string);
+ i::String* parent = slice->parent();
+ i::StringShape parent_shape(parent);
+ length_ = slice->length();
+ if (parent_shape.encoding_tag() == i::kAsciiStringTag) {
+ storage_type_ = kAscii;
+ if (parent_shape.representation_tag() == i::kSeqStringTag) {
+ ptr_ = i::SeqAsciiString::cast(parent)->GetChars() + slice->offset();
+ } else {
+ ASSERT(parent_shape.representation_tag() == i::kExternalStringTag);
+ ptr_ = i::ExternalAsciiString::cast(parent)->GetChars() +
+ slice->offset();
+ }
+ } else {
+ ASSERT(parent_shape.encoding_tag() == i::kTwoByteStringTag);
+ storage_type_ = kTwoByte;
+ if (parent_shape.representation_tag() == i::kSeqStringTag) {
+ ptr_ = i::SeqTwoByteString::cast(parent)->GetChars() + slice->offset();
+ } else {
+ ASSERT(parent_shape.representation_tag() == i::kExternalStringTag);
+ ptr_ = i::ExternalTwoByteString::cast(parent)->GetChars() +
+ slice->offset();
+ }
+ }
+ } else {
+ length_ = string->length();
+ if (shape.encoding_tag() == i::kAsciiStringTag) {
+ storage_type_ = kAscii;
+ if (shape.representation_tag() == i::kSeqStringTag) {
+ ptr_ = i::SeqAsciiString::cast(string)->GetChars();
+ } else {
+ ASSERT(shape.representation_tag() == i::kExternalStringTag);
+ ptr_ = i::ExternalAsciiString::cast(string)->GetChars();
+ }
+ } else {
+ ASSERT(shape.encoding_tag() == i::kTwoByteStringTag);
+ storage_type_ = kTwoByte;
+ if (shape.representation_tag() == i::kSeqStringTag) {
+ ptr_ = i::SeqTwoByteString::cast(string)->GetChars();
+ } else {
+ ASSERT(shape.representation_tag() == i::kExternalStringTag);
+ ptr_ = i::ExternalTwoByteString::cast(string)->GetChars();
+ }
+ }
+ }
+}
+
+
+// Force inline would be nice here too.
+strong_inline void ReadMemory::set_end() {
+ ptr_ = NULL;
+ length_ = 0;
+ storage_type_ = kNone;
+}
+
+
Local<Value> Exception::RangeError(v8::Handle<v8::String> raw_message) {
i::Isolate* isolate = i::Isolate::Current();
LOG_API(isolate, "RangeError");
« no previous file with comments | « include/v8.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698