Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 3813 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3824 | 3824 |
| 3825 bool String::IsOneByte() const { | 3825 bool String::IsOneByte() const { |
| 3826 i::Handle<i::String> str = Utils::OpenHandle(this); | 3826 i::Handle<i::String> str = Utils::OpenHandle(this); |
| 3827 if (IsDeadCheck(str->GetIsolate(), "v8::String::IsOneByte()")) { | 3827 if (IsDeadCheck(str->GetIsolate(), "v8::String::IsOneByte()")) { |
| 3828 return false; | 3828 return false; |
| 3829 } | 3829 } |
| 3830 return str->IsOneByteConvertible(); | 3830 return str->IsOneByteConvertible(); |
| 3831 } | 3831 } |
| 3832 | 3832 |
| 3833 | 3833 |
| 3834 class Utf8LengthVisitor { | 3834 class Utf8LengthHelper : public i::AllStatic { |
| 3835 public: | 3835 public: |
| 3836 explicit Utf8LengthVisitor() | 3836 enum State { |
| 3837 : utf8_length_(0), | 3837 kEndsWithLeadingSurrogate = 1 << 0, |
| 3838 last_character_(unibrow::Utf16::kNoPreviousCharacter) {} | 3838 kStartsWithTrailingSurrogate = 1 << 1, |
| 3839 | 3839 kLeftmostEdgeIsCalculated = 1 << 2, |
| 3840 inline int GetLength() { | 3840 kRightmostEdgeIsCalculated = 1 << 3, |
| 3841 return utf8_length_; | 3841 kLeftmostEdgeIsSurrogate = 1 << 4, |
| 3842 } | 3842 kRightmostEdgeIsSurrogate = 1 << 5, |
| 3843 | 3843 }; |
| 3844 template<typename Char> | 3844 |
| 3845 inline void Visit(const Char* chars, unsigned length) { | 3845 static const uint8_t kInitialState = 0; |
| 3846 ASSERT(length > 0); | 3846 |
| 3847 // TODO(dcarney) Add back ascii fast path. | 3847 static inline bool EndsWithSurrogate(uint8_t state) { |
| 3848 int utf8_length = 0; | 3848 return state & kEndsWithLeadingSurrogate; |
| 3849 int last_character = last_character_; | 3849 } |
| 3850 for (unsigned i = 0; i < length; i++) { | 3850 |
| 3851 uint16_t c = chars[i]; | 3851 static inline bool StartsWithSurrogate(uint8_t state) { |
| 3852 utf8_length += unibrow::Utf8::Length(c, last_character); | 3852 return state & kStartsWithTrailingSurrogate; |
| 3853 last_character = c; | 3853 } |
| 3854 } | 3854 |
| 3855 last_character_ = last_character; | 3855 class Visitor { |
| 3856 utf8_length_ += utf8_length; | 3856 public: |
| 3857 } | 3857 explicit Visitor() |
| 3858 | 3858 : utf8_length_(0), |
| 3859 inline void VisitOneByteString(const uint8_t* chars, unsigned length) { | 3859 state_(kInitialState) {} |
| 3860 Visit(chars, length); | 3860 |
| 3861 } | 3861 template<typename Char> |
| 3862 | 3862 inline void Visit(const Char* chars, int length) { |
| 3863 inline void VisitTwoByteString(const uint16_t* chars, unsigned length) { | 3863 int utf8_length = 0; |
| 3864 Visit(chars, length); | 3864 int last_character = unibrow::Utf16::kNoPreviousCharacter; |
| 3865 for (int i = 0; i < length; i++) { | |
| 3866 uint16_t c = chars[i]; | |
| 3867 utf8_length += unibrow::Utf8::Length(c, last_character); | |
| 3868 if (sizeof(Char) > 1) { | |
| 3869 last_character = c; | |
| 3870 } | |
| 3871 } | |
| 3872 utf8_length_ = utf8_length; | |
| 3873 } | |
| 3874 | |
| 3875 void VisitOneByteString(const uint8_t* chars, int length) { | |
| 3876 Visit(chars, length); | |
| 3877 state_ = kInitialState; | |
| 3878 } | |
| 3879 | |
| 3880 void VisitTwoByteString(const uint16_t* chars, int length) { | |
| 3881 Visit(chars, length); | |
| 3882 uint8_t state = 0; | |
| 3883 if (unibrow::Utf16::IsTrailSurrogate(chars[0])) { | |
| 3884 state |= kStartsWithTrailingSurrogate; | |
| 3885 } | |
| 3886 if (unibrow::Utf16::IsLeadSurrogate(chars[length-1])) { | |
| 3887 state |= kEndsWithLeadingSurrogate; | |
| 3888 } | |
| 3889 state_ = state; | |
| 3890 } | |
| 3891 | |
| 3892 static i::ConsString* VisitFlat(i::String* string, | |
| 3893 int* length, | |
| 3894 uint8_t* state) { | |
| 3895 Visitor visitor; | |
| 3896 i::ConsString* cons_string = i::String::VisitFlat(&visitor, string); | |
| 3897 *length = visitor.utf8_length_; | |
| 3898 *state = visitor.state_; | |
| 3899 return cons_string; | |
| 3900 } | |
| 3901 | |
| 3902 private: | |
| 3903 int utf8_length_; | |
| 3904 uint8_t state_; | |
| 3905 DISALLOW_COPY_AND_ASSIGN(Visitor); | |
| 3906 }; | |
| 3907 | |
| 3908 static inline void MergeLeafLeft(int* length, | |
| 3909 uint8_t* state, | |
| 3910 uint8_t leaf_state) { | |
| 3911 bool edge_surrogate = StartsWithSurrogate(leaf_state); | |
| 3912 if (!(*state & kLeftmostEdgeIsCalculated)) { | |
| 3913 ASSERT(!(*state & kLeftmostEdgeIsSurrogate)); | |
| 3914 *state |= kLeftmostEdgeIsCalculated | |
| 3915 | (edge_surrogate ? kLeftmostEdgeIsSurrogate : 0); | |
| 3916 } else if (EndsWithSurrogate(*state) && edge_surrogate) { | |
| 3917 *length -= unibrow::Utf8::kBytesSavedByCombiningSurrogates; | |
| 3918 } | |
| 3919 if (EndsWithSurrogate(leaf_state)) { | |
|
Yang
2013/03/05 15:32:51
something to consider here:
http://graphics.stanfo
drcarney
2013/03/06 15:39:21
will handle in the next patch which is a performan
| |
| 3920 *state |= kEndsWithLeadingSurrogate; | |
| 3921 } else { | |
| 3922 *state &= ~kEndsWithLeadingSurrogate; | |
| 3923 } | |
| 3924 } | |
| 3925 | |
| 3926 static inline void MergeLeafRight(int* length, | |
| 3927 uint8_t* state, | |
| 3928 uint8_t leaf_state) { | |
| 3929 bool edge_surrogate = EndsWithSurrogate(leaf_state); | |
| 3930 if (!(*state & kRightmostEdgeIsCalculated)) { | |
| 3931 ASSERT(!(*state & kRightmostEdgeIsSurrogate)); | |
| 3932 *state |= (kRightmostEdgeIsCalculated | |
| 3933 | (edge_surrogate ? kRightmostEdgeIsSurrogate : 0)); | |
| 3934 } else if (edge_surrogate && StartsWithSurrogate(*state)) { | |
| 3935 *length -= unibrow::Utf8::kBytesSavedByCombiningSurrogates; | |
| 3936 } | |
| 3937 if (StartsWithSurrogate(leaf_state)) { | |
| 3938 *state |= kStartsWithTrailingSurrogate; | |
| 3939 } else { | |
| 3940 *state &= ~kStartsWithTrailingSurrogate; | |
| 3941 } | |
| 3942 } | |
| 3943 | |
| 3944 static inline void MergeTerminal(int* length, | |
| 3945 uint8_t state, | |
| 3946 uint8_t* state_out) { | |
| 3947 ASSERT((state & kLeftmostEdgeIsCalculated) && | |
| 3948 (state & kRightmostEdgeIsCalculated)); | |
| 3949 if (EndsWithSurrogate(state) && StartsWithSurrogate(state)) { | |
| 3950 *length -= unibrow::Utf8::kBytesSavedByCombiningSurrogates; | |
| 3951 } | |
| 3952 *state_out = kInitialState | | |
| 3953 (state & kLeftmostEdgeIsSurrogate ? kStartsWithTrailingSurrogate : 0) | | |
| 3954 (state & kRightmostEdgeIsSurrogate ? kEndsWithLeadingSurrogate : 0); | |
| 3955 } | |
| 3956 | |
| 3957 static int Calculate(i::ConsString* current, uint8_t* state_out) { | |
| 3958 using namespace internal; | |
| 3959 int total_length = 0; | |
| 3960 uint8_t state = kInitialState; | |
| 3961 while (true) { | |
| 3962 i::String* left = current->first(); | |
| 3963 i::String* right = current->second(); | |
| 3964 uint8_t right_leaf_state; | |
| 3965 uint8_t left_leaf_state; | |
| 3966 int leaf_length; | |
| 3967 ConsString* left_as_cons = | |
| 3968 Visitor::VisitFlat(left, &leaf_length, &left_leaf_state); | |
| 3969 if (left_as_cons == NULL) { | |
| 3970 total_length += leaf_length; | |
| 3971 MergeLeafLeft(&total_length, &state, left_leaf_state); | |
| 3972 } | |
| 3973 ConsString* right_as_cons = | |
| 3974 Visitor::VisitFlat(right, &leaf_length, &right_leaf_state); | |
| 3975 if (right_as_cons == NULL) { | |
| 3976 total_length += leaf_length; | |
| 3977 MergeLeafRight(&total_length, &state, right_leaf_state); | |
| 3978 // Terminal node. | |
| 3979 if (left_as_cons == NULL) { | |
| 3980 MergeTerminal(&total_length, state, state_out); | |
| 3981 return total_length; | |
| 3982 } | |
| 3983 } else if (left_as_cons != NULL) { | |
| 3984 // Both strings are ConsStrings. | |
| 3985 // Recurse on smallest. | |
| 3986 if (left->length() < right->length()) { | |
| 3987 total_length += Calculate(left_as_cons, &left_leaf_state); | |
| 3988 MergeLeafLeft(&total_length, &state, left_leaf_state); | |
| 3989 current = right_as_cons; | |
| 3990 continue; | |
| 3991 } else { | |
| 3992 total_length += Calculate(right_as_cons, &right_leaf_state); | |
| 3993 MergeLeafRight(&total_length, &state, right_leaf_state); | |
| 3994 current = left_as_cons; | |
| 3995 continue; | |
| 3996 } | |
| 3997 } | |
| 3998 // 1 leaf node. Do in place descent. | |
| 3999 if (left_as_cons != NULL) { | |
| 4000 current = left_as_cons; | |
| 4001 } else { | |
| 4002 ASSERT(right_as_cons != NULL); | |
| 4003 current = right_as_cons; | |
| 4004 } | |
| 4005 } | |
| 4006 UNREACHABLE(); | |
| 4007 return 0; | |
| 4008 } | |
| 4009 | |
| 4010 static inline int Calculate(i::ConsString* current) { | |
| 4011 uint8_t state = kInitialState; | |
| 4012 return Calculate(current, &state); | |
| 3865 } | 4013 } |
| 3866 | 4014 |
| 3867 private: | 4015 private: |
| 3868 int utf8_length_; | 4016 DISALLOW_IMPLICIT_CONSTRUCTORS(Utf8LengthHelper); |
| 3869 int last_character_; | |
| 3870 DISALLOW_COPY_AND_ASSIGN(Utf8LengthVisitor); | |
| 3871 }; | 4017 }; |
| 3872 | 4018 |
| 3873 | 4019 |
| 3874 static int Utf8Length(i::String* str, i::Isolate* isolate) { | 4020 static int Utf8Length(i::String* str, i::Isolate* isolate) { |
| 3875 unsigned length = static_cast<unsigned>(str->length()); | 4021 int length = str->length(); |
| 3876 if (length == 0) return 0; | 4022 if (length == 0) return 0; |
| 3877 int32_t type = str->map()->instance_type(); | 4023 uint8_t state; |
| 3878 Utf8LengthVisitor visitor; | 4024 i::ConsString* cons_string = |
| 3879 // Non ConsString branch. | 4025 Utf8LengthHelper::Visitor::VisitFlat(str, &length, &state); |
| 3880 if ((type & i::kStringRepresentationMask) != i::kConsStringTag) { | 4026 if (cons_string == NULL) return length; |
| 3881 i::ConsStringNullOp null_op; | 4027 return Utf8LengthHelper::Calculate(cons_string); |
| 3882 i::String::Visit(str, 0, visitor, null_op, type, length); | |
| 3883 return visitor.GetLength(); | |
| 3884 } | |
| 3885 i::ConsStringIteratorOp* op = isolate->write_iterator(); | |
| 3886 unsigned offset = 0; | |
| 3887 i::String* leaf = op->Operate(str, &offset, &type, &length); | |
| 3888 ASSERT(leaf != NULL); | |
| 3889 while (leaf != NULL) { | |
| 3890 i::ConsStringNullOp null_op; | |
| 3891 ASSERT(offset == 0); | |
| 3892 i::String::Visit(leaf, 0, visitor, null_op, type, length); | |
| 3893 leaf = op->ContinueOperation(&type, &length); | |
| 3894 } | |
| 3895 return visitor.GetLength(); | |
| 3896 } | 4028 } |
| 3897 | 4029 |
| 3898 | 4030 |
| 3899 int String::Utf8Length() const { | 4031 int String::Utf8Length() const { |
| 3900 i::Handle<i::String> str = Utils::OpenHandle(this); | 4032 i::Handle<i::String> str = Utils::OpenHandle(this); |
| 3901 i::Isolate* isolate = str->GetIsolate(); | 4033 i::Isolate* isolate = str->GetIsolate(); |
| 3902 if (IsDeadCheck(isolate, "v8::String::Utf8Length()")) return 0; | 4034 if (IsDeadCheck(isolate, "v8::String::Utf8Length()")) return 0; |
| 3903 return v8::Utf8Length(*str, isolate); | 4035 return v8::Utf8Length(*str, isolate); |
| 3904 } | 4036 } |
| 3905 | 4037 |
| 3906 | 4038 |
| 3907 class Utf8WriterVisitor { | 4039 class Utf8WriterVisitor { |
| 3908 public: | 4040 public: |
| 3909 Utf8WriterVisitor(char* buffer, int capacity) | 4041 Utf8WriterVisitor( |
| 4042 char* buffer, int capacity, bool skip_capacity_check) | |
| 3910 : early_termination_(false), | 4043 : early_termination_(false), |
| 3911 last_character_(unibrow::Utf16::kNoPreviousCharacter), | 4044 last_character_(unibrow::Utf16::kNoPreviousCharacter), |
| 3912 buffer_(buffer), | 4045 buffer_(buffer), |
| 3913 start_(buffer), | 4046 start_(buffer), |
| 3914 capacity_(capacity), | 4047 capacity_(capacity), |
| 4048 skip_capacity_check_(capacity == -1 || skip_capacity_check), | |
| 3915 utf16_chars_read_(0) { | 4049 utf16_chars_read_(0) { |
| 3916 } | 4050 } |
| 3917 | 4051 |
| 3918 static int WriteEndCharacter(uint16_t character, | 4052 static int WriteEndCharacter(uint16_t character, |
| 3919 int last_character, | 4053 int last_character, |
| 3920 int remaining, | 4054 int remaining, |
| 3921 char* const buffer) { | 4055 char* const buffer) { |
| 3922 using namespace unibrow; | 4056 using namespace unibrow; |
| 3923 ASSERT(remaining > 0); | 4057 ASSERT(remaining > 0); |
| 3924 // We can't use a local buffer here because Encode needs to modify | 4058 // We can't use a local buffer here because Encode needs to modify |
| 3925 // previous characters in the stream. We know, however, that | 4059 // previous characters in the stream. We know, however, that |
| 3926 // exactly one character will be advanced. | 4060 // exactly one character will be advanced. |
| 3927 if (Utf16::IsTrailSurrogate(character) && | 4061 if (Utf16::IsTrailSurrogate(character) && |
| 3928 Utf16::IsLeadSurrogate(last_character)) { | 4062 Utf16::IsLeadSurrogate(last_character)) { |
| 3929 int written = Utf8::Encode(buffer, character, last_character); | 4063 int written = Utf8::Encode(buffer, character, last_character); |
| 3930 ASSERT(written == 1); | 4064 ASSERT(written == 1); |
| 3931 return written; | 4065 return written; |
| 3932 } | 4066 } |
| 3933 // Use a scratch buffer to check the required characters. | 4067 // Use a scratch buffer to check the required characters. |
| 3934 char temp_buffer[Utf8::kMaxEncodedSize]; | 4068 char temp_buffer[Utf8::kMaxEncodedSize]; |
| 3935 // Can't encode using last_character as gcc has array bounds issues. | 4069 // Can't encode using last_character as gcc has array bounds issues. |
| 3936 int written = Utf8::Encode(temp_buffer, | 4070 int written = Utf8::Encode(temp_buffer, |
| 3937 character, | 4071 character, |
| 3938 unibrow::Utf16::kNoPreviousCharacter); | 4072 Utf16::kNoPreviousCharacter); |
| 3939 // Won't fit. | 4073 // Won't fit. |
| 3940 if (written > remaining) return 0; | 4074 if (written > remaining) return 0; |
| 3941 // Copy over the character from temp_buffer. | 4075 // Copy over the character from temp_buffer. |
| 3942 for (int j = 0; j < written; j++) { | 4076 for (int j = 0; j < written; j++) { |
| 3943 buffer[j] = temp_buffer[j]; | 4077 buffer[j] = temp_buffer[j]; |
| 3944 } | 4078 } |
| 3945 return written; | 4079 return written; |
| 3946 } | 4080 } |
| 3947 | 4081 |
| 3948 template<typename Char> | 4082 template<typename Char> |
| 3949 void Visit(const Char* chars, const int length) { | 4083 void Visit(const Char* chars, const int length) { |
| 3950 using namespace unibrow; | 4084 using namespace unibrow; |
| 3951 // TODO(dcarney): Add back ascii fast path. | |
| 3952 ASSERT(!early_termination_); | 4085 ASSERT(!early_termination_); |
| 3953 ASSERT(length > 0); | 4086 if (length == 0) return; |
| 3954 // Copy state to stack. | 4087 // Copy state to stack. |
| 3955 char* buffer = buffer_; | 4088 char* buffer = buffer_; |
| 3956 int last_character = last_character_; | 4089 int last_character = |
| 4090 sizeof(Char) == 1 ? Utf16::kNoPreviousCharacter : last_character_; | |
| 3957 int i = 0; | 4091 int i = 0; |
| 3958 // Do a fast loop where there is no exit capacity check. | 4092 // Do a fast loop where there is no exit capacity check. |
| 3959 while (true) { | 4093 while (true) { |
| 3960 int fast_length; | 4094 int fast_length; |
| 3961 if (capacity_ == -1) { | 4095 if (skip_capacity_check_) { |
| 3962 fast_length = length; | 4096 fast_length = length; |
| 3963 } else { | 4097 } else { |
| 3964 int remaining_capacity = capacity_ - static_cast<int>(buffer - start_); | 4098 int remaining_capacity = capacity_ - static_cast<int>(buffer - start_); |
| 3965 // Need enough space to write everything but one character. | 4099 // Need enough space to write everything but one character. |
| 3966 STATIC_ASSERT(Utf16::kMaxExtraUtf8BytesForOneUtf16CodeUnit == 3); | 4100 STATIC_ASSERT(Utf16::kMaxExtraUtf8BytesForOneUtf16CodeUnit == 3); |
| 3967 int writable_length = (remaining_capacity - 3)/3; | 4101 int max_size_per_char = sizeof(Char) == 1 ? 2 : 3; |
| 4102 int writable_length = | |
| 4103 (remaining_capacity - max_size_per_char)/max_size_per_char; | |
| 3968 // Need to drop into slow loop. | 4104 // Need to drop into slow loop. |
| 3969 if (writable_length <= 0) break; | 4105 if (writable_length <= 0) break; |
| 3970 fast_length = i + writable_length; | 4106 fast_length = i + writable_length; |
| 3971 if (fast_length > length) fast_length = length; | 4107 if (fast_length > length) fast_length = length; |
| 3972 } | 4108 } |
| 3973 // Write the characters to the stream. | 4109 // Write the characters to the stream. |
| 3974 for (; i < fast_length; i++) { | 4110 for (; i < fast_length; i++) { |
| 3975 uint16_t character = *chars++; | 4111 uint16_t character = *chars++; |
| 3976 buffer += Utf8::Encode(buffer, character, last_character); | 4112 if (sizeof(Char) == 1) { |
|
Yang
2013/03/05 15:32:51
It looks like this if should be hoisted to outside
drcarney
2013/03/06 15:39:21
done
| |
| 3977 last_character = character; | 4113 buffer += |
| 4114 Utf8::Encode(buffer, character, Utf16::kNoPreviousCharacter); | |
| 4115 } else { | |
| 4116 buffer += Utf8::Encode(buffer, character, last_character); | |
| 4117 last_character = character; | |
| 4118 } | |
| 3978 ASSERT(capacity_ == -1 || (buffer - start_) <= capacity_); | 4119 ASSERT(capacity_ == -1 || (buffer - start_) <= capacity_); |
| 3979 } | 4120 } |
| 3980 // Array is fully written. Exit. | 4121 // Array is fully written. Exit. |
| 3981 if (fast_length == length) { | 4122 if (fast_length == length) { |
| 3982 // Write state back out to object. | 4123 // Write state back out to object. |
| 3983 last_character_ = last_character; | 4124 last_character_ = last_character; |
| 3984 buffer_ = buffer; | 4125 buffer_ = buffer; |
| 3985 utf16_chars_read_ += i; | 4126 utf16_chars_read_ += length; |
| 3986 return; | 4127 return; |
| 3987 } | 4128 } |
| 3988 } | 4129 } |
| 3989 ASSERT(capacity_ != -1); | 4130 ASSERT(!skip_capacity_check_); |
| 3990 // Slow loop. Must check capacity on each iteration. | 4131 // Slow loop. Must check capacity on each iteration. |
| 3991 int remaining_capacity = capacity_ - static_cast<int>(buffer - start_); | 4132 int remaining_capacity = capacity_ - static_cast<int>(buffer - start_); |
| 3992 ASSERT(remaining_capacity >= 0); | 4133 ASSERT(remaining_capacity >= 0); |
| 3993 for (; i < length && remaining_capacity > 0; i++) { | 4134 for (; i < length && remaining_capacity > 0; i++) { |
| 3994 uint16_t character = *chars++; | 4135 uint16_t character = *chars++; |
| 3995 int written = WriteEndCharacter(character, | 4136 int written = WriteEndCharacter(character, |
| 3996 last_character, | 4137 last_character, |
| 3997 remaining_capacity, | 4138 remaining_capacity, |
| 3998 buffer); | 4139 buffer); |
| 3999 if (written == 0) { | 4140 if (written == 0) { |
| 4000 early_termination_ = true; | 4141 early_termination_ = true; |
| 4001 break; | 4142 break; |
| 4002 } | 4143 } |
| 4003 buffer += written; | 4144 buffer += written; |
| 4004 remaining_capacity -= written; | 4145 remaining_capacity -= written; |
| 4005 last_character = character; | 4146 last_character = character; |
| 4006 } | 4147 } |
| 4007 // Write state back out to object. | 4148 // Write state back out to object. |
| 4008 last_character_ = last_character; | 4149 last_character_ = last_character; |
| 4009 buffer_ = buffer; | 4150 buffer_ = buffer; |
| 4010 utf16_chars_read_ += i; | 4151 utf16_chars_read_ += i; |
| 4011 } | 4152 } |
| 4012 | 4153 |
| 4013 inline bool IsDone() { | 4154 inline bool IsDone() { |
| 4014 return early_termination_; | 4155 return early_termination_; |
| 4015 } | 4156 } |
| 4016 | 4157 |
| 4017 inline void VisitOneByteString(const uint8_t* chars, unsigned length) { | 4158 inline void VisitOneByteString(const uint8_t* chars, int length) { |
| 4018 Visit(chars, static_cast<int>(length)); | 4159 Visit(chars, length); |
| 4019 } | 4160 } |
| 4020 | 4161 |
| 4021 inline void VisitTwoByteString(const uint16_t* chars, unsigned length) { | 4162 inline void VisitTwoByteString(const uint16_t* chars, int length) { |
| 4022 Visit(chars, static_cast<int>(length)); | 4163 Visit(chars, length); |
| 4023 } | 4164 } |
| 4024 | 4165 |
| 4025 inline int CompleteWrite(bool write_null, int* utf16_chars_read_out) { | 4166 int CompleteWrite(bool write_null, int* utf16_chars_read_out) { |
| 4026 // Write out number of utf16 characters written to the stream. | 4167 // Write out number of utf16 characters written to the stream. |
| 4027 if (utf16_chars_read_out != NULL) { | 4168 if (utf16_chars_read_out != NULL) { |
| 4028 *utf16_chars_read_out = utf16_chars_read_; | 4169 *utf16_chars_read_out = utf16_chars_read_; |
| 4029 } | 4170 } |
| 4030 // Only null terminate if all of the string was written and there's space. | 4171 // Only null terminate if all of the string was written and there's space. |
| 4031 if (write_null && | 4172 if (write_null && |
| 4032 !early_termination_ && | 4173 !early_termination_ && |
| 4033 (capacity_ == -1 || (buffer_ - start_) < capacity_)) { | 4174 (capacity_ == -1 || (buffer_ - start_) < capacity_)) { |
| 4034 *buffer_++ = '\0'; | 4175 *buffer_++ = '\0'; |
| 4035 } | 4176 } |
| 4036 return static_cast<int>(buffer_ - start_); | 4177 return static_cast<int>(buffer_ - start_); |
| 4037 } | 4178 } |
| 4038 | 4179 |
| 4039 private: | 4180 private: |
| 4040 bool early_termination_; | 4181 bool early_termination_; |
| 4041 int last_character_; | 4182 int last_character_; |
| 4042 char* buffer_; | 4183 char* buffer_; |
| 4043 char* const start_; | 4184 char* const start_; |
| 4044 int capacity_; | 4185 int capacity_; |
| 4186 bool const skip_capacity_check_; | |
| 4045 int utf16_chars_read_; | 4187 int utf16_chars_read_; |
| 4046 DISALLOW_IMPLICIT_CONSTRUCTORS(Utf8WriterVisitor); | 4188 DISALLOW_IMPLICIT_CONSTRUCTORS(Utf8WriterVisitor); |
| 4047 }; | 4189 }; |
| 4048 | 4190 |
| 4049 | 4191 |
| 4192 static bool RecursivelySerializeToUtf8(i::String* current, | |
| 4193 Utf8WriterVisitor* writer, | |
| 4194 int recursion_budget) { | |
| 4195 while (!writer->IsDone()) { | |
| 4196 i::ConsString* cons_string = i::String::VisitFlat(writer, current); | |
| 4197 if (cons_string == NULL) return true; // Leaf node. | |
| 4198 if (recursion_budget <= 0) return false; | |
| 4199 // Must write the left branch first. | |
| 4200 i::String* first = cons_string->first(); | |
| 4201 bool success = RecursivelySerializeToUtf8(first, | |
| 4202 writer, | |
| 4203 recursion_budget - 1); | |
| 4204 if (!success) return false; | |
| 4205 // Inline tail recurse for right branch. | |
| 4206 current = cons_string->second(); | |
| 4207 } | |
| 4208 return true; | |
| 4209 } | |
| 4210 | |
| 4211 | |
| 4050 int String::WriteUtf8(char* buffer, | 4212 int String::WriteUtf8(char* buffer, |
| 4051 int capacity, | 4213 int capacity, |
| 4052 int* nchars_ref, | 4214 int* nchars_ref, |
| 4053 int options) const { | 4215 int options) const { |
| 4054 i::Isolate* isolate = Utils::OpenHandle(this)->GetIsolate(); | 4216 i::Isolate* isolate = Utils::OpenHandle(this)->GetIsolate(); |
| 4055 if (IsDeadCheck(isolate, "v8::String::WriteUtf8()")) return 0; | 4217 if (IsDeadCheck(isolate, "v8::String::WriteUtf8()")) return 0; |
| 4056 LOG_API(isolate, "String::WriteUtf8"); | 4218 LOG_API(isolate, "String::WriteUtf8"); |
| 4057 ENTER_V8(isolate); | 4219 ENTER_V8(isolate); |
| 4058 i::Handle<i::String> str = Utils::OpenHandle(this); | 4220 i::Handle<i::String> str = Utils::OpenHandle(this); |
| 4059 if (options & HINT_MANY_WRITES_EXPECTED) { | 4221 if (options & HINT_MANY_WRITES_EXPECTED) { |
| 4060 FlattenString(str); // Flatten the string for efficiency. | 4222 FlattenString(str); // Flatten the string for efficiency. |
| 4061 } | 4223 } |
| 4062 Utf8WriterVisitor writer(buffer, capacity); | 4224 const int string_length = str->length(); |
| 4063 i::ConsStringIteratorOp* op = isolate->write_iterator(); | 4225 bool write_null = !(options & NO_NULL_TERMINATION); |
| 4064 op->Reset(); | 4226 // First check if we can just write the string without checking capacity. |
| 4065 int32_t type = str->map()->instance_type(); | 4227 if (capacity == -1 || capacity / 3 >= string_length) { |
| 4066 unsigned str_length = static_cast<unsigned>(str->length()); | 4228 Utf8WriterVisitor writer(buffer, capacity, true); |
| 4067 if (str_length != 0) { | 4229 const int kMaxRecursion = 100; |
| 4068 i::String::Visit(*str, 0, writer, *op, type, str_length); | 4230 bool success = RecursivelySerializeToUtf8(*str, &writer, kMaxRecursion); |
| 4069 while (!writer.IsDone()) { | 4231 if (success) return writer.CompleteWrite(write_null, nchars_ref); |
| 4070 unsigned length_out; | 4232 } else if (capacity >= string_length) { |
| 4071 i::String* next = op->ContinueOperation(&type, &length_out); | 4233 // First check that the buffer is large enough. |
| 4072 if (next == NULL) break; | 4234 int utf8_bytes = v8::Utf8Length(*str, str->GetIsolate()); |
| 4073 // TODO(dcarney): need an asserting null op. | 4235 if (utf8_bytes <= capacity) { |
| 4074 i::ConsStringNullOp null_op; | 4236 // ASCII fast path. |
| 4075 i::String::Visit(next, 0, writer, null_op, type, length_out); | 4237 if (utf8_bytes == string_length) { |
| 4238 WriteOneByte(reinterpret_cast<uint8_t*>(buffer), 0, capacity, options); | |
| 4239 if (nchars_ref != NULL) *nchars_ref = string_length; | |
| 4240 if (write_null && (utf8_bytes+1 <= capacity)) { | |
| 4241 return string_length + 1; | |
| 4242 } | |
| 4243 return string_length; | |
| 4244 } | |
| 4245 if (write_null && (utf8_bytes+1 > capacity)) { | |
| 4246 options |= NO_NULL_TERMINATION; | |
| 4247 } | |
| 4248 // Recurse once without a capacity limit. | |
| 4249 // This will get into the first branch above. | |
| 4250 // TODO(dcarney) Check max left rec. in Utf8Length and fall through. | |
| 4251 return WriteUtf8(buffer, -1, nchars_ref, options); | |
| 4076 } | 4252 } |
| 4077 } | 4253 } |
| 4078 return writer.CompleteWrite(!(options & NO_NULL_TERMINATION), nchars_ref); | 4254 // Recursive slow path can potentially be unreasonable slow. Flatten. |
| 4255 str = FlattenGetString(str); | |
| 4256 Utf8WriterVisitor writer(buffer, capacity, false); | |
| 4257 i::String::VisitFlat(&writer, *str); | |
| 4258 return writer.CompleteWrite(write_null, nchars_ref); | |
| 4079 } | 4259 } |
| 4080 | 4260 |
| 4081 | 4261 |
| 4082 int String::WriteAscii(char* buffer, | 4262 int String::WriteAscii(char* buffer, |
| 4083 int start, | 4263 int start, |
| 4084 int length, | 4264 int length, |
| 4085 int options) const { | 4265 int options) const { |
| 4086 i::Isolate* isolate = Utils::OpenHandle(this)->GetIsolate(); | 4266 i::Isolate* isolate = Utils::OpenHandle(this)->GetIsolate(); |
| 4087 if (IsDeadCheck(isolate, "v8::String::WriteAscii()")) return 0; | 4267 if (IsDeadCheck(isolate, "v8::String::WriteAscii()")) return 0; |
| 4088 LOG_API(isolate, "String::WriteAscii"); | 4268 LOG_API(isolate, "String::WriteAscii"); |
| (...skipping 2635 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 6724 | 6904 |
| 6725 v->VisitPointers(blocks_.first(), first_block_limit_); | 6905 v->VisitPointers(blocks_.first(), first_block_limit_); |
| 6726 | 6906 |
| 6727 for (int i = 1; i < blocks_.length(); i++) { | 6907 for (int i = 1; i < blocks_.length(); i++) { |
| 6728 v->VisitPointers(blocks_[i], &blocks_[i][kHandleBlockSize]); | 6908 v->VisitPointers(blocks_[i], &blocks_[i][kHandleBlockSize]); |
| 6729 } | 6909 } |
| 6730 } | 6910 } |
| 6731 | 6911 |
| 6732 | 6912 |
| 6733 } } // namespace v8::internal | 6913 } } // namespace v8::internal |
| OLD | NEW |