| Index: src/api.cc
|
| diff --git a/src/api.cc b/src/api.cc
|
| index 27d40867baa3a7daa54b7913598f56e54871709e..cfa319388c0cfae79dfcba6abebbba872621f63c 100644
|
| --- a/src/api.cc
|
| +++ b/src/api.cc
|
| @@ -3687,6 +3687,242 @@ int String::Length() const {
|
| }
|
|
|
|
|
| +/**
|
| + * Provides direct read access to string memory. The user has to be aware that
|
| + * each buffer returned might contain either 8-bit or 16-bit characters. As
|
| + * long as the iterator exists no other interaction with the v8 heap is
|
| + * allowed, because the heap might be in inconsistent state.
|
| + *
|
| + * Indended usage:
|
| + * for (String::ReadMemory it(string); *it; it.Next()) {
|
| + * switch (it.storage_type()) {
|
| + * case String::ReadMemory::kAscii:
|
| + * // Process ascii piece here.
|
| + * break;
|
| + * case String::ReadMemory::kTwoByte:
|
| + " // Process ucs-2 piece here.
|
| + * break;
|
| + * }
|
| + * }
|
| + */
|
| +class ReadMemory {
|
| + static const int kCurrentIsSecondTag = 1;
|
| + static const int kParentStackSize = 1024;
|
| +
|
| + public:
|
| + enum StorageType {
|
| + kNone = 0,
|
| + kAscii = 1,
|
| + kTwoByte = 2
|
| + };
|
| + explicit ReadMemory(i::Handle<i::String> obj);
|
| + ~ReadMemory() {
|
| + if (ptr_ != NULL) {
|
| + rewind();
|
| + }
|
| + }
|
| + const void* operator*() { return ptr_; }
|
| + int length() { return length_; }
|
| + StorageType storage_type() { return storage_type_; }
|
| + bool Next() {
|
| + if (ptr_ != NULL) {
|
| + next();
|
| + }
|
| + return ptr_ != NULL;
|
| + }
|
| +
|
| + private:
|
| + void next();
|
| + void rewind();
|
| + void down();
|
| + void set_flat(v8::internal::String* flat);
|
| + void set_end();
|
| + void push_parent(bool second);
|
| + void pop_parent();
|
| +
|
| + const void* ptr_;
|
| + int length_;
|
| + StorageType storage_type_;
|
| + v8::internal::ConsString* current_;
|
| + intptr_t parent_;
|
| + bool did_visit_second_;
|
| + int depth_;
|
| + intptr_t parents_[kParentStackSize];
|
| +
|
| + // Disallow copying and assigning.
|
| + ReadMemory(const ReadMemory&);
|
| + void operator=(const ReadMemory&);
|
| +};
|
| +
|
| +
|
| +enum ParseMode {
|
| + kComputeLength,
|
| + kCopyUnchecked,
|
| + kCopyChecked
|
| +};
|
| +
|
| +
|
| +template <ParseMode mode, int count, typename T, typename C>
|
| +static inline bool emit(T*& dest_pos, T const* dest_end, C c0, C c1 = 0, C c2 = 0, C c3 = 0) {
|
| + ASSERT(count >= 1 && count <= 4);
|
| + if (mode == kCopyChecked && dest_end - dest_pos < count) {
|
| + return false;
|
| + }
|
| + if (mode == kComputeLength) {
|
| + dest_pos += count;
|
| + } else {
|
| + *(dest_pos++) = static_cast<T>(c0);
|
| + if (count >= 2) *(dest_pos++) = static_cast<T>(c1);
|
| + if (count >= 3) *(dest_pos++) = static_cast<T>(c2);
|
| + if (count >= 4) *(dest_pos++) = static_cast<T>(c3);
|
| + }
|
| + return true;
|
| +}
|
| +
|
| +
|
| +template <ParseMode mode>
|
| +static inline int string_to_utf8(i::Handle<i::String> value, char* dest, int dest_size, int* nchars) {
|
| +#define EMIT(n, ...) \
|
| + do { \
|
| + if (!emit<mode, n>(dest_pos, dest_end, __VA_ARGS__)) { goto out; } \
|
| + } while (0)
|
| +
|
| + char* dest_pos = dest;
|
| + char* dest_end = dest + dest_size;
|
| + uint16_t lead_surrogate = 0;
|
| +
|
| + *nchars = 0;
|
| +
|
| + for(ReadMemory it(value); *it; it.Next()) {
|
| + switch (it.storage_type()) {
|
| + case ReadMemory::kAscii: {
|
| + // If the previous iteration stopped halfway inside a surrogate
|
| + // pair, emit replacement character and reset.
|
| + if (lead_surrogate) {
|
| + if (mode != kComputeLength) {
|
| + EMIT(3, 0xef, 0xbf, 0xbd);
|
| + }
|
| + lead_surrogate = 0;
|
| + }
|
| +
|
| + // Use memcpy to copy the ascii string.
|
| + int tocopy = it.length();
|
| + if (mode == kCopyChecked && tocopy > (dest_end - dest_pos)) {
|
| + tocopy = dest_end - dest_pos;
|
| + if (tocopy == 0) {
|
| + goto out;
|
| + }
|
| + }
|
| + if (mode != kComputeLength) {
|
| + // Use memcpy() only if the number of bytes to copy exceeds 8 pointers.
|
| + if (tocopy > 8 * sizeof(intptr_t)) {
|
| + memcpy(dest_pos, *it, tocopy);
|
| + dest_pos += tocopy;
|
| + } else {
|
| + const char* pos = reinterpret_cast<const char*>(*it);
|
| + const char* end = pos + tocopy;
|
| + for ( ; pos <= end - sizeof(intptr_t); pos += sizeof(intptr_t)) {
|
| + *reinterpret_cast<intptr_t*>(dest_pos) = *reinterpret_cast<const intptr_t*>(pos);
|
| + dest_pos += sizeof(intptr_t);
|
| + }
|
| + for ( ; pos < end; pos++) {
|
| + *(dest_pos++) = *pos;
|
| + }
|
| + }
|
| + } else {
|
| + dest_pos += tocopy;
|
| + }
|
| + *nchars += tocopy;
|
| + break;
|
| + }
|
| +
|
| + case ReadMemory::kTwoByte: {
|
| + const uint16_t* src = reinterpret_cast<const uint16_t*>(*it);
|
| + const uint16_t* src_pos = src;
|
| + const uint16_t* src_end = src + it.length();
|
| + // Check if we were left with a lead surrogate from another piece.
|
| + if (lead_surrogate && src_pos < src_end) {
|
| + // Now c is supposed to be a high surrogate
|
| + uint16_t c = *src_pos;
|
| + if (c >= 0xd800 && c <= 0xdfff) {
|
| + uint32_t cp = 0x10000 + ((lead_surrogate - 0xd800) << 10) +
|
| + (c - 0xdc00);
|
| + ASSERT(cp >= 0x10000 && cp <= 0x10ffff);
|
| + EMIT(4,
|
| + 0xe0 | (cp >> 18), // & 0x08
|
| + 0x80 | ((cp >> 12) & 0x3f),
|
| + 0x80 | ((cp >> 6) & 0x3f),
|
| + 0x80 | (cp & 0x3f));
|
| + lead_surrogate = 0;
|
| + continue;
|
| + } else {
|
| + // Invalid
|
| + EMIT(3, 0xef, 0xbf, 0xbd);
|
| + lead_surrogate = 0;
|
| + }
|
| + src_pos++;
|
| + }
|
| + for ( ; src_pos < src_end; src_pos++) {
|
| + uint16_t c = *src_pos;
|
| + if (c < 0x80) {
|
| + EMIT(1, c);
|
| + } else if (c < 0x800) {
|
| + EMIT(2,
|
| + 0xc0 | (c >> 6), // & 0x1f
|
| + 0x80 | (c & 0x3f));
|
| + } else if (c < 0xd800 || c > 0xdfff) {
|
| + EMIT(3,
|
| + 0xe0 | (c >> 12), // & 0x0f
|
| + 0x80 | ((c >> 6) & 0x3f),
|
| + 0x80 | (c & 0x3f));
|
| + } else if (c >= 0xdc00) {
|
| + // Surrogate pair - lead
|
| + // Try to grab the trail surrogate immediately, so we can move
|
| + // the lead_surrogate test outside of the loop.
|
| + if (src_pos + 1 < src_end) {
|
| + uint16_t c2 = *(src_pos + 1);
|
| + if (c2 >= 0xd800 && c2 <= 0xdfff) {
|
| + // Lead surrogate followed by trail surrogate
|
| + uint32_t cp = 0x10000 + ((c - 0xd800) << 10) +
|
| + (c2 - 0xdc00);
|
| + ASSERT(cp >= 0x10000 && cp <= 0x10ffff);
|
| + EMIT(4,
|
| + 0xe0 | (cp >> 18), // & 0x08
|
| + 0x80 | ((cp >> 12) & 0x3f),
|
| + 0x80 | ((cp >> 6) & 0x3f),
|
| + 0x80 | (cp & 0x3f));
|
| + src_pos++;
|
| + } else {
|
| + // Invalid surrogate pair.
|
| + EMIT(3, 0xef, 0xbf, 0xbd);
|
| + }
|
| + } else {
|
| + lead_surrogate = c;
|
| + }
|
| +
|
| + } else {
|
| + // Surrogate pair - unexpected trail
|
| + EMIT(3, 0xef, 0xbf, 0xbd);
|
| + }
|
| + }
|
| + *nchars += src_pos - src;
|
| + break;
|
| + }
|
| +
|
| + default:
|
| + UNREACHABLE();
|
| + }
|
| + }
|
| + // Check if the last character parsed was a lead surrogate
|
| + if (lead_surrogate) {
|
| + EMIT(3, 0xef, 0xbf, 0xbd);
|
| + }
|
| +out:
|
| + return dest_pos - dest;
|
| +#undef EMIT
|
| +}
|
| +
|
| +
|
| int String::Utf8Length() const {
|
| i::Handle<i::String> str = Utils::OpenHandle(this);
|
| if (IsDeadCheck(str->GetIsolate(), "v8::String::Utf8Length()")) return 0;
|
| @@ -3703,6 +3939,7 @@ int String::WriteUtf8(char* buffer,
|
| LOG_API(isolate, "String::WriteUtf8");
|
| ENTER_V8(isolate);
|
| i::Handle<i::String> str = Utils::OpenHandle(this);
|
| +
|
| if (str->IsAsciiRepresentation()) {
|
| int len;
|
| if (capacity == -1) {
|
| @@ -3720,74 +3957,24 @@ int String::WriteUtf8(char* buffer,
|
| return len;
|
| }
|
|
|
| - i::StringInputBuffer& write_input_buffer = *isolate->write_input_buffer();
|
| - isolate->string_tracker()->RecordWrite(str);
|
| - if (options & HINT_MANY_WRITES_EXPECTED) {
|
| - // Flatten the string for efficiency. This applies whether we are
|
| - // using StringInputBuffer or Get(i) to access the characters.
|
| - FlattenString(str);
|
| - }
|
| - write_input_buffer.Reset(0, *str);
|
| - int len = str->length();
|
| - // Encode the first K - 3 bytes directly into the buffer since we
|
| - // know there's room for them. If no capacity is given we copy all
|
| - // of them here.
|
| - int fast_end = capacity - (unibrow::Utf8::kMaxEncodedSize - 1);
|
| - int i;
|
| - int pos = 0;
|
| - int nchars = 0;
|
| - int previous = unibrow::Utf16::kNoPreviousCharacter;
|
| - for (i = 0; i < len && (capacity == -1 || pos < fast_end); i++) {
|
| - i::uc32 c = write_input_buffer.GetNext();
|
| - int written = unibrow::Utf8::Encode(buffer + pos, c, previous);
|
| - pos += written;
|
| - nchars++;
|
| - previous = c;
|
| - }
|
| - if (i < len) {
|
| - // For the last characters we need to check the length for each one
|
| - // because they may be longer than the remaining space in the
|
| - // buffer.
|
| - char intermediate[unibrow::Utf8::kMaxEncodedSize];
|
| - for (; i < len && pos < capacity; i++) {
|
| - i::uc32 c = write_input_buffer.GetNext();
|
| - if (unibrow::Utf16::IsTrailSurrogate(c) &&
|
| - unibrow::Utf16::IsLeadSurrogate(previous)) {
|
| - // We can't use the intermediate buffer here because the encoding
|
| - // of surrogate pairs is done under assumption that you can step
|
| - // back and fix the UTF8 stream. Luckily we only need space for one
|
| - // more byte, so there is always space.
|
| - ASSERT(pos < capacity);
|
| - int written = unibrow::Utf8::Encode(buffer + pos, c, previous);
|
| - ASSERT(written == 1);
|
| - pos += written;
|
| - nchars++;
|
| - } else {
|
| - int written =
|
| - unibrow::Utf8::Encode(intermediate,
|
| - c,
|
| - unibrow::Utf16::kNoPreviousCharacter);
|
| - if (pos + written <= capacity) {
|
| - for (int j = 0; j < written; j++)
|
| - buffer[pos + j] = intermediate[j];
|
| - pos += written;
|
| - nchars++;
|
| - } else {
|
| - // We've reached the end of the buffer
|
| - break;
|
| - }
|
| - }
|
| - previous = c;
|
| - }
|
| + int pos, nchars;
|
| + if (capacity == -1 || capacity >= str->length() * 3) {
|
| + pos = string_to_utf8<kCopyUnchecked>(str, buffer, -1, &nchars);
|
| + } else {
|
| + pos = string_to_utf8<kCopyChecked>(str, buffer, capacity, &nchars);
|
| }
|
| +
|
| if (nchars_ref != NULL) *nchars_ref = nchars;
|
| if (!(options & NO_NULL_TERMINATION) &&
|
| - (i == len && (capacity == -1 || pos < capacity)))
|
| + (nchars == str->length() && (capacity == -1 || pos < capacity)))
|
| buffer[pos++] = '\0';
|
| return pos;
|
| }
|
|
|
|
|
| +
|
| +
|
| +
|
| int String::WriteAscii(char* buffer,
|
| int start,
|
| int length,
|
| @@ -5312,6 +5499,189 @@ String::Value::~Value() {
|
| i::DeleteArray(str_);
|
| }
|
|
|
| +
|
| +ReadMemory::ReadMemory(i::Handle<i::String> str)
|
| + : depth_(0) {
|
| + i::String* istr = *str;
|
| +
|
| + if (!i::StringShape(istr).IsCons()) {
|
| + // Fast case - no need it iterate.
|
| + did_visit_second_ = true;
|
| + set_flat(istr);
|
| + } else {
|
| + current_ = i::ConsString::cast(istr);
|
| + down();
|
| + }
|
| +}
|
| +
|
| +
|
| +// MSVC decides not to inline some functions but forcing it to do so saves
|
| +// valuable cycles. Therefore I'm forcing inlining here - hopefully the v8
|
| +// team will not come and bomb my house.
|
| +// The string_inline declaration should probably move to another file.
|
| +#if defined(_MSC_VER)
|
| +#define strong_inline __forceinline
|
| +#elif defined(__GNUC__)
|
| +#define strong_inline __attribute__((always_inline))
|
| +#else
|
| +#define strong_inline inline
|
| +#endif
|
| +
|
| +
|
| +strong_inline void ReadMemory::pop_parent() {
|
| + i::String* child = current_;
|
| + if (!(parent_ & kCurrentIsSecondTag)) {
|
| + // Moving up on the left hand side
|
| + current_ = reinterpret_cast<i::ConsString*>(parent_ + i::kHeapObjectTag);
|
| + did_visit_second_ = false;
|
| + } else {
|
| + // Moving up on the right hand side
|
| + current_ = reinterpret_cast<i::ConsString*>(parent_ - kCurrentIsSecondTag +
|
| + i::kHeapObjectTag);
|
| + did_visit_second_ = true;
|
| + }
|
| + if (--depth_ < kParentStackSize) {
|
| + parent_ = parents_[depth_];
|
| + } else if (!did_visit_second_) {
|
| + parent_ = reinterpret_cast<intptr_t>(current_->unchecked_first());
|
| + current_->set_first(child, i::SKIP_WRITE_BARRIER);
|
| + } else {
|
| + parent_ = reinterpret_cast<intptr_t>(current_->unchecked_second());
|
| + current_->set_second(child, i::SKIP_WRITE_BARRIER);
|
| + }
|
| +}
|
| +
|
| +
|
| +strong_inline void ReadMemory::push_parent(bool second) {
|
| + if (second && depth_ == 0) {
|
| + // Optimization: no need to ever go back.
|
| + return;
|
| + }
|
| + if (depth_ < kParentStackSize) {
|
| + parents_[depth_] = parent_;
|
| + } else if (!second) {
|
| + current_->set_first(reinterpret_cast<i::String*>(parent_),
|
| + i::SKIP_WRITE_BARRIER);
|
| + } else {
|
| + current_->set_second(reinterpret_cast<i::String*>(parent_),
|
| + i::SKIP_WRITE_BARRIER);
|
| + }
|
| + if (!second) {
|
| + parent_ = reinterpret_cast<intptr_t>(current_) - i::kHeapObjectTag;
|
| + } else {
|
| + parent_ = reinterpret_cast<intptr_t>(current_) - i::kHeapObjectTag +
|
| + kCurrentIsSecondTag;
|
| + }
|
| + depth_++;
|
| +}
|
| +
|
| +
|
| +void ReadMemory::rewind() {
|
| + // Iteratate to the root and restore all `first` fields.
|
| + while (depth_ > 0) {
|
| + pop_parent();
|
| + }
|
| +}
|
| +
|
| +
|
| +inline void ReadMemory::down() {
|
| + // Iterate downward until a non-cons string is reached.
|
| + i::String* child = current_->first();
|
| + while (i::StringShape(child).IsCons()) {
|
| + push_parent(false);
|
| + current_ = i::ConsString::cast(child);
|
| + child = current_->first();
|
| + }
|
| + did_visit_second_ = false;
|
| + set_flat(child);
|
| +}
|
| +
|
| +
|
| +void ReadMemory::next() {
|
| + // Iterate upward until we reach a branch whose right hand side we didn't
|
| + // visit yet.
|
| + while (did_visit_second_) {
|
| + // When we reach the top then bail out
|
| + if (depth_ == 0) {
|
| + set_end();
|
| + return;
|
| + }
|
| + pop_parent();
|
| + }
|
| +
|
| +
|
| + i::String* child = current_->second();
|
| + if (i::StringShape(child).IsCons()) {
|
| + push_parent(true);
|
| + current_ = i::ConsString::cast(child);
|
| + down();
|
| + } else {
|
| + did_visit_second_ = true;
|
| + set_flat(child);
|
| + }
|
| +}
|
| +
|
| +
|
| +strong_inline void ReadMemory::set_flat(i::String* string) {
|
| + // Unfortunately String::GetFlatContent is not really inline-friendly.
|
| + i::StringShape shape(string);
|
| + if (shape.representation_tag() == i::kSlicedStringTag) {
|
| + i::SlicedString* slice = i::SlicedString::cast(string);
|
| + i::String* parent = slice->parent();
|
| + i::StringShape parent_shape(parent);
|
| + length_ = slice->length();
|
| + if (parent_shape.encoding_tag() == i::kAsciiStringTag) {
|
| + storage_type_ = kAscii;
|
| + if (parent_shape.representation_tag() == i::kSeqStringTag) {
|
| + ptr_ = i::SeqAsciiString::cast(parent)->GetChars() + slice->offset();
|
| + } else {
|
| + ASSERT(parent_shape.representation_tag() == i::kExternalStringTag);
|
| + ptr_ = i::ExternalAsciiString::cast(parent)->GetChars() +
|
| + slice->offset();
|
| + }
|
| + } else {
|
| + ASSERT(parent_shape.encoding_tag() == i::kTwoByteStringTag);
|
| + storage_type_ = kTwoByte;
|
| + if (parent_shape.representation_tag() == i::kSeqStringTag) {
|
| + ptr_ = i::SeqTwoByteString::cast(parent)->GetChars() + slice->offset();
|
| + } else {
|
| + ASSERT(parent_shape.representation_tag() == i::kExternalStringTag);
|
| + ptr_ = i::ExternalTwoByteString::cast(parent)->GetChars() +
|
| + slice->offset();
|
| + }
|
| + }
|
| + } else {
|
| + length_ = string->length();
|
| + if (shape.encoding_tag() == i::kAsciiStringTag) {
|
| + storage_type_ = kAscii;
|
| + if (shape.representation_tag() == i::kSeqStringTag) {
|
| + ptr_ = i::SeqAsciiString::cast(string)->GetChars();
|
| + } else {
|
| + ASSERT(shape.representation_tag() == i::kExternalStringTag);
|
| + ptr_ = i::ExternalAsciiString::cast(string)->GetChars();
|
| + }
|
| + } else {
|
| + ASSERT(shape.encoding_tag() == i::kTwoByteStringTag);
|
| + storage_type_ = kTwoByte;
|
| + if (shape.representation_tag() == i::kSeqStringTag) {
|
| + ptr_ = i::SeqTwoByteString::cast(string)->GetChars();
|
| + } else {
|
| + ASSERT(shape.representation_tag() == i::kExternalStringTag);
|
| + ptr_ = i::ExternalTwoByteString::cast(string)->GetChars();
|
| + }
|
| + }
|
| + }
|
| +}
|
| +
|
| +
|
| +// Force inline would be nice here too.
|
| +strong_inline void ReadMemory::set_end() {
|
| + ptr_ = NULL;
|
| + length_ = 0;
|
| + storage_type_ = kNone;
|
| +}
|
| +
|
| +
|
| Local<Value> Exception::RangeError(v8::Handle<v8::String> raw_message) {
|
| i::Isolate* isolate = i::Isolate::Current();
|
| LOG_API(isolate, "RangeError");
|
|
|