OLD | NEW |
---|---|
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 3813 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3824 | 3824 |
3825 bool String::IsOneByte() const { | 3825 bool String::IsOneByte() const { |
3826 i::Handle<i::String> str = Utils::OpenHandle(this); | 3826 i::Handle<i::String> str = Utils::OpenHandle(this); |
3827 if (IsDeadCheck(str->GetIsolate(), "v8::String::IsOneByte()")) { | 3827 if (IsDeadCheck(str->GetIsolate(), "v8::String::IsOneByte()")) { |
3828 return false; | 3828 return false; |
3829 } | 3829 } |
3830 return str->IsOneByteConvertible(); | 3830 return str->IsOneByteConvertible(); |
3831 } | 3831 } |
3832 | 3832 |
3833 | 3833 |
3834 class Utf8LengthVisitor { | 3834 class Utf8LengthHelper : public i::AllStatic { |
3835 public: | 3835 public: |
3836 explicit Utf8LengthVisitor() | 3836 enum State { |
3837 : utf8_length_(0), | 3837 kEndsWithLeadingSurrogate = 1 << 0, |
3838 last_character_(unibrow::Utf16::kNoPreviousCharacter) {} | 3838 kStartsWithTrailingSurrogate = 1 << 1, |
3839 | 3839 kLeftmostEdgeIsCalculated = 1 << 2, |
3840 inline int GetLength() { | 3840 kRightmostEdgeIsCalculated = 1 << 3, |
3841 return utf8_length_; | 3841 kLeftmostEdgeIsSurrogate = 1 << 4, |
3842 } | 3842 kRightmostEdgeIsSurrogate = 1 << 5 |
3843 | 3843 }; |
3844 template<typename Char> | 3844 |
3845 inline void Visit(const Char* chars, unsigned length) { | 3845 static const uint8_t kInitialState = 0; |
3846 ASSERT(length > 0); | 3846 |
3847 // TODO(dcarney) Add back ascii fast path. | 3847 static inline bool EndsWithSurrogate(uint8_t state) { |
3848 int utf8_length = 0; | 3848 return state & kEndsWithLeadingSurrogate; |
3849 int last_character = last_character_; | 3849 } |
3850 for (unsigned i = 0; i < length; i++) { | 3850 |
3851 uint16_t c = chars[i]; | 3851 static inline bool StartsWithSurrogate(uint8_t state) { |
3852 utf8_length += unibrow::Utf8::Length(c, last_character); | 3852 return state & kStartsWithTrailingSurrogate; |
3853 last_character = c; | 3853 } |
3854 } | 3854 |
3855 last_character_ = last_character; | 3855 class Visitor { |
3856 utf8_length_ += utf8_length; | 3856 public: |
3857 } | 3857 explicit Visitor() |
3858 | 3858 : utf8_length_(0), |
3859 inline void VisitOneByteString(const uint8_t* chars, unsigned length) { | 3859 state_(kInitialState) {} |
3860 Visit(chars, length); | 3860 |
3861 } | 3861 template<typename Char> |
3862 | 3862 inline void Visit(const Char* chars, int length) { |
3863 inline void VisitTwoByteString(const uint16_t* chars, unsigned length) { | 3863 int utf8_length = 0; |
3864 Visit(chars, length); | 3864 int last_character = unibrow::Utf16::kNoPreviousCharacter; |
Erik Corry
2013/03/07 09:38:37
I wonder if it's worth modifying this to take adva
| |
3865 for (int i = 0; i < length; i++) { | |
3866 uint16_t c = chars[i]; | |
3867 utf8_length += unibrow::Utf8::Length(c, last_character); | |
3868 if (sizeof(Char) > 1) { | |
3869 last_character = c; | |
3870 } | |
3871 } | |
3872 utf8_length_ = utf8_length; | |
3873 } | |
3874 | |
3875 void VisitOneByteString(const uint8_t* chars, int length) { | |
3876 Visit(chars, length); | |
3877 state_ = kInitialState; | |
3878 } | |
3879 | |
3880 void VisitTwoByteString(const uint16_t* chars, int length) { | |
3881 Visit(chars, length); | |
3882 uint8_t state = 0; | |
3883 if (unibrow::Utf16::IsTrailSurrogate(chars[0])) { | |
3884 state |= kStartsWithTrailingSurrogate; | |
3885 } | |
3886 if (unibrow::Utf16::IsLeadSurrogate(chars[length-1])) { | |
3887 state |= kEndsWithLeadingSurrogate; | |
3888 } | |
3889 state_ = state; | |
3890 } | |
3891 | |
3892 static i::ConsString* VisitFlat(i::String* string, | |
3893 int* length, | |
3894 uint8_t* state) { | |
3895 Visitor visitor; | |
3896 i::ConsString* cons_string = i::String::VisitFlat(&visitor, string); | |
3897 *length = visitor.utf8_length_; | |
3898 *state = visitor.state_; | |
3899 return cons_string; | |
3900 } | |
3901 | |
3902 private: | |
3903 int utf8_length_; | |
3904 uint8_t state_; | |
3905 DISALLOW_COPY_AND_ASSIGN(Visitor); | |
3906 }; | |
3907 | |
3908 static inline void MergeLeafLeft(int* length, | |
3909 uint8_t* state, | |
3910 uint8_t leaf_state) { | |
3911 bool edge_surrogate = StartsWithSurrogate(leaf_state); | |
3912 if (!(*state & kLeftmostEdgeIsCalculated)) { | |
3913 ASSERT(!(*state & kLeftmostEdgeIsSurrogate)); | |
3914 *state |= kLeftmostEdgeIsCalculated | |
3915 | (edge_surrogate ? kLeftmostEdgeIsSurrogate : 0); | |
3916 } else if (EndsWithSurrogate(*state) && edge_surrogate) { | |
3917 *length -= unibrow::Utf8::kBytesSavedByCombiningSurrogates; | |
3918 } | |
3919 if (EndsWithSurrogate(leaf_state)) { | |
3920 *state |= kEndsWithLeadingSurrogate; | |
3921 } else { | |
3922 *state &= ~kEndsWithLeadingSurrogate; | |
3923 } | |
3924 } | |
3925 | |
3926 static inline void MergeLeafRight(int* length, | |
3927 uint8_t* state, | |
3928 uint8_t leaf_state) { | |
3929 bool edge_surrogate = EndsWithSurrogate(leaf_state); | |
3930 if (!(*state & kRightmostEdgeIsCalculated)) { | |
3931 ASSERT(!(*state & kRightmostEdgeIsSurrogate)); | |
3932 *state |= (kRightmostEdgeIsCalculated | |
3933 | (edge_surrogate ? kRightmostEdgeIsSurrogate : 0)); | |
3934 } else if (edge_surrogate && StartsWithSurrogate(*state)) { | |
3935 *length -= unibrow::Utf8::kBytesSavedByCombiningSurrogates; | |
3936 } | |
3937 if (StartsWithSurrogate(leaf_state)) { | |
3938 *state |= kStartsWithTrailingSurrogate; | |
3939 } else { | |
3940 *state &= ~kStartsWithTrailingSurrogate; | |
3941 } | |
3942 } | |
3943 | |
3944 static inline void MergeTerminal(int* length, | |
3945 uint8_t state, | |
3946 uint8_t* state_out) { | |
3947 ASSERT((state & kLeftmostEdgeIsCalculated) && | |
3948 (state & kRightmostEdgeIsCalculated)); | |
3949 if (EndsWithSurrogate(state) && StartsWithSurrogate(state)) { | |
3950 *length -= unibrow::Utf8::kBytesSavedByCombiningSurrogates; | |
3951 } | |
3952 *state_out = kInitialState | | |
3953 (state & kLeftmostEdgeIsSurrogate ? kStartsWithTrailingSurrogate : 0) | | |
3954 (state & kRightmostEdgeIsSurrogate ? kEndsWithLeadingSurrogate : 0); | |
3955 } | |
3956 | |
3957 static int Calculate(i::ConsString* current, uint8_t* state_out) { | |
3958 using namespace internal; | |
3959 int total_length = 0; | |
3960 uint8_t state = kInitialState; | |
3961 while (true) { | |
3962 i::String* left = current->first(); | |
3963 i::String* right = current->second(); | |
3964 uint8_t right_leaf_state; | |
3965 uint8_t left_leaf_state; | |
3966 int leaf_length; | |
3967 ConsString* left_as_cons = | |
3968 Visitor::VisitFlat(left, &leaf_length, &left_leaf_state); | |
3969 if (left_as_cons == NULL) { | |
3970 total_length += leaf_length; | |
3971 MergeLeafLeft(&total_length, &state, left_leaf_state); | |
3972 } | |
3973 ConsString* right_as_cons = | |
3974 Visitor::VisitFlat(right, &leaf_length, &right_leaf_state); | |
3975 if (right_as_cons == NULL) { | |
3976 total_length += leaf_length; | |
3977 MergeLeafRight(&total_length, &state, right_leaf_state); | |
3978 // Terminal node. | |
3979 if (left_as_cons == NULL) { | |
3980 MergeTerminal(&total_length, state, state_out); | |
3981 return total_length; | |
3982 } | |
3983 } else if (left_as_cons != NULL) { | |
3984 // Both strings are ConsStrings. | |
3985 // Recurse on smallest. | |
3986 if (left->length() < right->length()) { | |
3987 total_length += Calculate(left_as_cons, &left_leaf_state); | |
3988 MergeLeafLeft(&total_length, &state, left_leaf_state); | |
3989 current = right_as_cons; | |
3990 continue; | |
3991 } else { | |
3992 total_length += Calculate(right_as_cons, &right_leaf_state); | |
3993 MergeLeafRight(&total_length, &state, right_leaf_state); | |
3994 current = left_as_cons; | |
3995 continue; | |
3996 } | |
3997 } | |
3998 // 1 leaf node. Do in place descent. | |
3999 if (left_as_cons != NULL) { | |
4000 current = left_as_cons; | |
4001 } else { | |
4002 ASSERT(right_as_cons != NULL); | |
4003 current = right_as_cons; | |
4004 } | |
4005 } | |
4006 UNREACHABLE(); | |
4007 return 0; | |
4008 } | |
4009 | |
4010 static inline int Calculate(i::ConsString* current) { | |
4011 uint8_t state = kInitialState; | |
4012 return Calculate(current, &state); | |
3865 } | 4013 } |
3866 | 4014 |
3867 private: | 4015 private: |
3868 int utf8_length_; | 4016 DISALLOW_IMPLICIT_CONSTRUCTORS(Utf8LengthHelper); |
3869 int last_character_; | |
3870 DISALLOW_COPY_AND_ASSIGN(Utf8LengthVisitor); | |
3871 }; | 4017 }; |
3872 | 4018 |
3873 | 4019 |
3874 static int Utf8Length(i::String* str, i::Isolate* isolate) { | 4020 static int Utf8Length(i::String* str, i::Isolate* isolate) { |
3875 unsigned length = static_cast<unsigned>(str->length()); | 4021 int length = str->length(); |
3876 if (length == 0) return 0; | 4022 if (length == 0) return 0; |
3877 int32_t type = str->map()->instance_type(); | 4023 uint8_t state; |
3878 Utf8LengthVisitor visitor; | 4024 i::ConsString* cons_string = |
3879 // Non ConsString branch. | 4025 Utf8LengthHelper::Visitor::VisitFlat(str, &length, &state); |
3880 if ((type & i::kStringRepresentationMask) != i::kConsStringTag) { | 4026 if (cons_string == NULL) return length; |
3881 i::ConsStringNullOp null_op; | 4027 return Utf8LengthHelper::Calculate(cons_string); |
3882 i::String::Visit(str, 0, visitor, null_op, type, length); | |
3883 return visitor.GetLength(); | |
3884 } | |
3885 i::ConsStringIteratorOp* op = isolate->write_iterator(); | |
3886 unsigned offset = 0; | |
3887 i::String* leaf = op->Operate(str, &offset, &type, &length); | |
3888 ASSERT(leaf != NULL); | |
3889 while (leaf != NULL) { | |
3890 i::ConsStringNullOp null_op; | |
3891 ASSERT(offset == 0); | |
3892 i::String::Visit(leaf, 0, visitor, null_op, type, length); | |
3893 leaf = op->ContinueOperation(&type, &length); | |
3894 } | |
3895 return visitor.GetLength(); | |
3896 } | 4028 } |
3897 | 4029 |
3898 | 4030 |
3899 int String::Utf8Length() const { | 4031 int String::Utf8Length() const { |
3900 i::Handle<i::String> str = Utils::OpenHandle(this); | 4032 i::Handle<i::String> str = Utils::OpenHandle(this); |
3901 i::Isolate* isolate = str->GetIsolate(); | 4033 i::Isolate* isolate = str->GetIsolate(); |
3902 if (IsDeadCheck(isolate, "v8::String::Utf8Length()")) return 0; | 4034 if (IsDeadCheck(isolate, "v8::String::Utf8Length()")) return 0; |
3903 return v8::Utf8Length(*str, isolate); | 4035 return v8::Utf8Length(*str, isolate); |
3904 } | 4036 } |
3905 | 4037 |
3906 | 4038 |
3907 class Utf8WriterVisitor { | 4039 class Utf8WriterVisitor { |
3908 public: | 4040 public: |
3909 Utf8WriterVisitor(char* buffer, int capacity) | 4041 Utf8WriterVisitor( |
4042 char* buffer, int capacity, bool skip_capacity_check) | |
3910 : early_termination_(false), | 4043 : early_termination_(false), |
3911 last_character_(unibrow::Utf16::kNoPreviousCharacter), | 4044 last_character_(unibrow::Utf16::kNoPreviousCharacter), |
3912 buffer_(buffer), | 4045 buffer_(buffer), |
3913 start_(buffer), | 4046 start_(buffer), |
3914 capacity_(capacity), | 4047 capacity_(capacity), |
4048 skip_capacity_check_(capacity == -1 || skip_capacity_check), | |
3915 utf16_chars_read_(0) { | 4049 utf16_chars_read_(0) { |
3916 } | 4050 } |
3917 | 4051 |
3918 static int WriteEndCharacter(uint16_t character, | 4052 static int WriteEndCharacter(uint16_t character, |
3919 int last_character, | 4053 int last_character, |
3920 int remaining, | 4054 int remaining, |
3921 char* const buffer) { | 4055 char* const buffer) { |
3922 using namespace unibrow; | 4056 using namespace unibrow; |
3923 ASSERT(remaining > 0); | 4057 ASSERT(remaining > 0); |
3924 // We can't use a local buffer here because Encode needs to modify | 4058 // We can't use a local buffer here because Encode needs to modify |
3925 // previous characters in the stream. We know, however, that | 4059 // previous characters in the stream. We know, however, that |
3926 // exactly one character will be advanced. | 4060 // exactly one character will be advanced. |
3927 if (Utf16::IsTrailSurrogate(character) && | 4061 if (Utf16::IsTrailSurrogate(character) && |
3928 Utf16::IsLeadSurrogate(last_character)) { | 4062 Utf16::IsLeadSurrogate(last_character)) { |
3929 int written = Utf8::Encode(buffer, character, last_character); | 4063 int written = Utf8::Encode(buffer, character, last_character); |
3930 ASSERT(written == 1); | 4064 ASSERT(written == 1); |
3931 return written; | 4065 return written; |
3932 } | 4066 } |
3933 // Use a scratch buffer to check the required characters. | 4067 // Use a scratch buffer to check the required characters. |
3934 char temp_buffer[Utf8::kMaxEncodedSize]; | 4068 char temp_buffer[Utf8::kMaxEncodedSize]; |
3935 // Can't encode using last_character as gcc has array bounds issues. | 4069 // Can't encode using last_character as gcc has array bounds issues. |
3936 int written = Utf8::Encode(temp_buffer, | 4070 int written = Utf8::Encode(temp_buffer, |
3937 character, | 4071 character, |
3938 unibrow::Utf16::kNoPreviousCharacter); | 4072 Utf16::kNoPreviousCharacter); |
3939 // Won't fit. | 4073 // Won't fit. |
3940 if (written > remaining) return 0; | 4074 if (written > remaining) return 0; |
3941 // Copy over the character from temp_buffer. | 4075 // Copy over the character from temp_buffer. |
3942 for (int j = 0; j < written; j++) { | 4076 for (int j = 0; j < written; j++) { |
3943 buffer[j] = temp_buffer[j]; | 4077 buffer[j] = temp_buffer[j]; |
3944 } | 4078 } |
3945 return written; | 4079 return written; |
3946 } | 4080 } |
3947 | 4081 |
3948 template<typename Char> | 4082 template<typename Char> |
3949 void Visit(const Char* chars, const int length) { | 4083 void Visit(const Char* chars, const int length) { |
3950 using namespace unibrow; | 4084 using namespace unibrow; |
3951 // TODO(dcarney): Add back ascii fast path. | |
3952 ASSERT(!early_termination_); | 4085 ASSERT(!early_termination_); |
3953 ASSERT(length > 0); | 4086 if (length == 0) return; |
3954 // Copy state to stack. | 4087 // Copy state to stack. |
3955 char* buffer = buffer_; | 4088 char* buffer = buffer_; |
3956 int last_character = last_character_; | 4089 int last_character = |
4090 sizeof(Char) == 1 ? Utf16::kNoPreviousCharacter : last_character_; | |
3957 int i = 0; | 4091 int i = 0; |
3958 // Do a fast loop where there is no exit capacity check. | 4092 // Do a fast loop where there is no exit capacity check. |
3959 while (true) { | 4093 while (true) { |
3960 int fast_length; | 4094 int fast_length; |
3961 if (capacity_ == -1) { | 4095 if (skip_capacity_check_) { |
3962 fast_length = length; | 4096 fast_length = length; |
3963 } else { | 4097 } else { |
3964 int remaining_capacity = capacity_ - static_cast<int>(buffer - start_); | 4098 int remaining_capacity = capacity_ - static_cast<int>(buffer - start_); |
3965 // Need enough space to write everything but one character. | 4099 // Need enough space to write everything but one character. |
3966 STATIC_ASSERT(Utf16::kMaxExtraUtf8BytesForOneUtf16CodeUnit == 3); | 4100 STATIC_ASSERT(Utf16::kMaxExtraUtf8BytesForOneUtf16CodeUnit == 3); |
3967 int writable_length = (remaining_capacity - 3)/3; | 4101 int max_size_per_char = sizeof(Char) == 1 ? 2 : 3; |
4102 int writable_length = | |
4103 (remaining_capacity - max_size_per_char)/max_size_per_char; | |
3968 // Need to drop into slow loop. | 4104 // Need to drop into slow loop. |
3969 if (writable_length <= 0) break; | 4105 if (writable_length <= 0) break; |
3970 fast_length = i + writable_length; | 4106 fast_length = i + writable_length; |
3971 if (fast_length > length) fast_length = length; | 4107 if (fast_length > length) fast_length = length; |
3972 } | 4108 } |
3973 // Write the characters to the stream. | 4109 // Write the characters to the stream. |
3974 for (; i < fast_length; i++) { | 4110 if (sizeof(Char) == 1) { |
3975 uint16_t character = *chars++; | 4111 for (; i < fast_length; i++) { |
3976 buffer += Utf8::Encode(buffer, character, last_character); | 4112 buffer += |
3977 last_character = character; | 4113 Utf8::Encode(buffer, *chars++, Utf16::kNoPreviousCharacter); |
3978 ASSERT(capacity_ == -1 || (buffer - start_) <= capacity_); | 4114 ASSERT(capacity_ == -1 || (buffer - start_) <= capacity_); |
4115 } | |
4116 } else { | |
4117 for (; i < fast_length; i++) { | |
4118 uint16_t character = *chars++; | |
4119 buffer += Utf8::Encode(buffer, character, last_character); | |
4120 last_character = character; | |
4121 ASSERT(capacity_ == -1 || (buffer - start_) <= capacity_); | |
4122 } | |
3979 } | 4123 } |
3980 // Array is fully written. Exit. | 4124 // Array is fully written. Exit. |
3981 if (fast_length == length) { | 4125 if (fast_length == length) { |
3982 // Write state back out to object. | 4126 // Write state back out to object. |
3983 last_character_ = last_character; | 4127 last_character_ = last_character; |
3984 buffer_ = buffer; | 4128 buffer_ = buffer; |
3985 utf16_chars_read_ += i; | 4129 utf16_chars_read_ += length; |
3986 return; | 4130 return; |
3987 } | 4131 } |
3988 } | 4132 } |
3989 ASSERT(capacity_ != -1); | 4133 ASSERT(!skip_capacity_check_); |
3990 // Slow loop. Must check capacity on each iteration. | 4134 // Slow loop. Must check capacity on each iteration. |
3991 int remaining_capacity = capacity_ - static_cast<int>(buffer - start_); | 4135 int remaining_capacity = capacity_ - static_cast<int>(buffer - start_); |
3992 ASSERT(remaining_capacity >= 0); | 4136 ASSERT(remaining_capacity >= 0); |
3993 for (; i < length && remaining_capacity > 0; i++) { | 4137 for (; i < length && remaining_capacity > 0; i++) { |
3994 uint16_t character = *chars++; | 4138 uint16_t character = *chars++; |
3995 int written = WriteEndCharacter(character, | 4139 int written = WriteEndCharacter(character, |
3996 last_character, | 4140 last_character, |
3997 remaining_capacity, | 4141 remaining_capacity, |
3998 buffer); | 4142 buffer); |
3999 if (written == 0) { | 4143 if (written == 0) { |
4000 early_termination_ = true; | 4144 early_termination_ = true; |
4001 break; | 4145 break; |
4002 } | 4146 } |
4003 buffer += written; | 4147 buffer += written; |
4004 remaining_capacity -= written; | 4148 remaining_capacity -= written; |
4005 last_character = character; | 4149 last_character = character; |
4006 } | 4150 } |
4007 // Write state back out to object. | 4151 // Write state back out to object. |
4008 last_character_ = last_character; | 4152 last_character_ = last_character; |
4009 buffer_ = buffer; | 4153 buffer_ = buffer; |
4010 utf16_chars_read_ += i; | 4154 utf16_chars_read_ += i; |
4011 } | 4155 } |
4012 | 4156 |
4013 inline bool IsDone() { | 4157 inline bool IsDone() { |
4014 return early_termination_; | 4158 return early_termination_; |
4015 } | 4159 } |
4016 | 4160 |
4017 inline void VisitOneByteString(const uint8_t* chars, unsigned length) { | 4161 inline void VisitOneByteString(const uint8_t* chars, int length) { |
4018 Visit(chars, static_cast<int>(length)); | 4162 Visit(chars, length); |
4019 } | 4163 } |
4020 | 4164 |
4021 inline void VisitTwoByteString(const uint16_t* chars, unsigned length) { | 4165 inline void VisitTwoByteString(const uint16_t* chars, int length) { |
4022 Visit(chars, static_cast<int>(length)); | 4166 Visit(chars, length); |
4023 } | 4167 } |
4024 | 4168 |
4025 inline int CompleteWrite(bool write_null, int* utf16_chars_read_out) { | 4169 int CompleteWrite(bool write_null, int* utf16_chars_read_out) { |
4026 // Write out number of utf16 characters written to the stream. | 4170 // Write out number of utf16 characters written to the stream. |
4027 if (utf16_chars_read_out != NULL) { | 4171 if (utf16_chars_read_out != NULL) { |
4028 *utf16_chars_read_out = utf16_chars_read_; | 4172 *utf16_chars_read_out = utf16_chars_read_; |
4029 } | 4173 } |
4030 // Only null terminate if all of the string was written and there's space. | 4174 // Only null terminate if all of the string was written and there's space. |
4031 if (write_null && | 4175 if (write_null && |
4032 !early_termination_ && | 4176 !early_termination_ && |
4033 (capacity_ == -1 || (buffer_ - start_) < capacity_)) { | 4177 (capacity_ == -1 || (buffer_ - start_) < capacity_)) { |
4034 *buffer_++ = '\0'; | 4178 *buffer_++ = '\0'; |
4035 } | 4179 } |
4036 return static_cast<int>(buffer_ - start_); | 4180 return static_cast<int>(buffer_ - start_); |
4037 } | 4181 } |
4038 | 4182 |
4039 private: | 4183 private: |
4040 bool early_termination_; | 4184 bool early_termination_; |
4041 int last_character_; | 4185 int last_character_; |
4042 char* buffer_; | 4186 char* buffer_; |
4043 char* const start_; | 4187 char* const start_; |
4044 int capacity_; | 4188 int capacity_; |
4189 bool const skip_capacity_check_; | |
4045 int utf16_chars_read_; | 4190 int utf16_chars_read_; |
4046 DISALLOW_IMPLICIT_CONSTRUCTORS(Utf8WriterVisitor); | 4191 DISALLOW_IMPLICIT_CONSTRUCTORS(Utf8WriterVisitor); |
4047 }; | 4192 }; |
4048 | 4193 |
4049 | 4194 |
4195 static bool RecursivelySerializeToUtf8(i::String* current, | |
4196 Utf8WriterVisitor* writer, | |
4197 int recursion_budget) { | |
4198 while (!writer->IsDone()) { | |
4199 i::ConsString* cons_string = i::String::VisitFlat(writer, current); | |
4200 if (cons_string == NULL) return true; // Leaf node. | |
4201 if (recursion_budget <= 0) return false; | |
4202 // Must write the left branch first. | |
4203 i::String* first = cons_string->first(); | |
4204 bool success = RecursivelySerializeToUtf8(first, | |
4205 writer, | |
4206 recursion_budget - 1); | |
4207 if (!success) return false; | |
4208 // Inline tail recurse for right branch. | |
4209 current = cons_string->second(); | |
4210 } | |
4211 return true; | |
4212 } | |
4213 | |
4214 | |
4050 int String::WriteUtf8(char* buffer, | 4215 int String::WriteUtf8(char* buffer, |
4051 int capacity, | 4216 int capacity, |
4052 int* nchars_ref, | 4217 int* nchars_ref, |
4053 int options) const { | 4218 int options) const { |
4054 i::Isolate* isolate = Utils::OpenHandle(this)->GetIsolate(); | 4219 i::Isolate* isolate = Utils::OpenHandle(this)->GetIsolate(); |
4055 if (IsDeadCheck(isolate, "v8::String::WriteUtf8()")) return 0; | 4220 if (IsDeadCheck(isolate, "v8::String::WriteUtf8()")) return 0; |
4056 LOG_API(isolate, "String::WriteUtf8"); | 4221 LOG_API(isolate, "String::WriteUtf8"); |
4057 ENTER_V8(isolate); | 4222 ENTER_V8(isolate); |
4058 i::Handle<i::String> str = Utils::OpenHandle(this); | 4223 i::Handle<i::String> str = Utils::OpenHandle(this); |
4059 if (options & HINT_MANY_WRITES_EXPECTED) { | 4224 if (options & HINT_MANY_WRITES_EXPECTED) { |
4060 FlattenString(str); // Flatten the string for efficiency. | 4225 FlattenString(str); // Flatten the string for efficiency. |
4061 } | 4226 } |
4062 Utf8WriterVisitor writer(buffer, capacity); | 4227 const int string_length = str->length(); |
4063 i::ConsStringIteratorOp* op = isolate->write_iterator(); | 4228 bool write_null = !(options & NO_NULL_TERMINATION); |
4064 op->Reset(); | 4229 // First check if we can just write the string without checking capacity. |
4065 int32_t type = str->map()->instance_type(); | 4230 if (capacity == -1 || capacity / 3 >= string_length) { |
4066 unsigned str_length = static_cast<unsigned>(str->length()); | 4231 Utf8WriterVisitor writer(buffer, capacity, true); |
4067 if (str_length != 0) { | 4232 const int kMaxRecursion = 100; |
4068 i::String::Visit(*str, 0, writer, *op, type, str_length); | 4233 bool success = RecursivelySerializeToUtf8(*str, &writer, kMaxRecursion); |
4069 while (!writer.IsDone()) { | 4234 if (success) return writer.CompleteWrite(write_null, nchars_ref); |
4070 unsigned length_out; | 4235 } else if (capacity >= string_length) { |
4071 i::String* next = op->ContinueOperation(&type, &length_out); | 4236 // First check that the buffer is large enough. |
4072 if (next == NULL) break; | 4237 int utf8_bytes = v8::Utf8Length(*str, str->GetIsolate()); |
4073 // TODO(dcarney): need an asserting null op. | 4238 if (utf8_bytes <= capacity) { |
4074 i::ConsStringNullOp null_op; | 4239 // ASCII fast path. |
4075 i::String::Visit(next, 0, writer, null_op, type, length_out); | 4240 if (utf8_bytes == string_length) { |
4241 WriteOneByte(reinterpret_cast<uint8_t*>(buffer), 0, capacity, options); | |
4242 if (nchars_ref != NULL) *nchars_ref = string_length; | |
4243 if (write_null && (utf8_bytes+1 <= capacity)) { | |
4244 return string_length + 1; | |
4245 } | |
4246 return string_length; | |
4247 } | |
4248 if (write_null && (utf8_bytes+1 > capacity)) { | |
4249 options |= NO_NULL_TERMINATION; | |
4250 } | |
4251 // Recurse once without a capacity limit. | |
4252 // This will get into the first branch above. | |
4253 // TODO(dcarney) Check max left rec. in Utf8Length and fall through. | |
4254 return WriteUtf8(buffer, -1, nchars_ref, options); | |
4076 } | 4255 } |
4077 } | 4256 } |
4078 return writer.CompleteWrite(!(options & NO_NULL_TERMINATION), nchars_ref); | 4257 // Recursive slow path can potentially be unreasonable slow. Flatten. |
4258 str = FlattenGetString(str); | |
4259 Utf8WriterVisitor writer(buffer, capacity, false); | |
4260 i::String::VisitFlat(&writer, *str); | |
4261 return writer.CompleteWrite(write_null, nchars_ref); | |
4079 } | 4262 } |
4080 | 4263 |
4081 | 4264 |
4082 int String::WriteAscii(char* buffer, | 4265 int String::WriteAscii(char* buffer, |
4083 int start, | 4266 int start, |
4084 int length, | 4267 int length, |
4085 int options) const { | 4268 int options) const { |
4086 i::Isolate* isolate = Utils::OpenHandle(this)->GetIsolate(); | 4269 i::Isolate* isolate = Utils::OpenHandle(this)->GetIsolate(); |
4087 if (IsDeadCheck(isolate, "v8::String::WriteAscii()")) return 0; | 4270 if (IsDeadCheck(isolate, "v8::String::WriteAscii()")) return 0; |
4088 LOG_API(isolate, "String::WriteAscii"); | 4271 LOG_API(isolate, "String::WriteAscii"); |
(...skipping 2635 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
6724 | 6907 |
6725 v->VisitPointers(blocks_.first(), first_block_limit_); | 6908 v->VisitPointers(blocks_.first(), first_block_limit_); |
6726 | 6909 |
6727 for (int i = 1; i < blocks_.length(); i++) { | 6910 for (int i = 1; i < blocks_.length(); i++) { |
6728 v->VisitPointers(blocks_[i], &blocks_[i][kHandleBlockSize]); | 6911 v->VisitPointers(blocks_[i], &blocks_[i][kHandleBlockSize]); |
6729 } | 6912 } |
6730 } | 6913 } |
6731 | 6914 |
6732 | 6915 |
6733 } } // namespace v8::internal | 6916 } } // namespace v8::internal |
OLD | NEW |