Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 6025 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 6036 | 6036 |
| 6037 // Negative length means the to the end of the string. | 6037 // Negative length means the to the end of the string. |
| 6038 if (length < 0) length = kMaxInt - offset; | 6038 if (length < 0) length = kMaxInt - offset; |
| 6039 | 6039 |
| 6040 // Compute the size of the UTF-8 string. Start at the specified offset. | 6040 // Compute the size of the UTF-8 string. Start at the specified offset. |
| 6041 Access<StringInputBuffer> buffer( | 6041 Access<StringInputBuffer> buffer( |
| 6042 heap->isolate()->objects_string_input_buffer()); | 6042 heap->isolate()->objects_string_input_buffer()); |
| 6043 buffer->Reset(offset, this); | 6043 buffer->Reset(offset, this); |
| 6044 int character_position = offset; | 6044 int character_position = offset; |
| 6045 int utf8_bytes = 0; | 6045 int utf8_bytes = 0; |
| 6046 int last = unibrow::Utf8::kNoPreviousCharacter; | |
| 6046 while (buffer->has_more() && character_position++ < offset + length) { | 6047 while (buffer->has_more() && character_position++ < offset + length) { |
| 6047 uint16_t character = buffer->GetNext(); | 6048 uint16_t character = buffer->GetNext(); |
| 6048 utf8_bytes += unibrow::Utf8::Length(character); | 6049 utf8_bytes += unibrow::Utf8::Length(character, last); |
| 6050 last = character; | |
| 6049 } | 6051 } |
| 6050 | 6052 |
| 6051 if (length_return) { | 6053 if (length_return) { |
| 6052 *length_return = utf8_bytes; | 6054 *length_return = utf8_bytes; |
| 6053 } | 6055 } |
| 6054 | 6056 |
| 6055 char* result = NewArray<char>(utf8_bytes + 1); | 6057 char* result = NewArray<char>(utf8_bytes + 1); |
| 6056 | 6058 |
| 6057 // Convert the UTF-16 string to a UTF-8 buffer. Start at the specified offset. | 6059 // Convert the UTF-16 string to a UTF-8 buffer. Start at the specified offset. |
| 6058 buffer->Rewind(); | 6060 buffer->Rewind(); |
| 6059 buffer->Seek(offset); | 6061 buffer->Seek(offset); |
| 6060 character_position = offset; | 6062 character_position = offset; |
| 6061 int utf8_byte_position = 0; | 6063 int utf8_byte_position = 0; |
| 6064 last = unibrow::Utf8::kNoPreviousCharacter; | |
| 6062 while (buffer->has_more() && character_position++ < offset + length) { | 6065 while (buffer->has_more() && character_position++ < offset + length) { |
| 6063 uint16_t character = buffer->GetNext(); | 6066 uint16_t character = buffer->GetNext(); |
| 6064 if (allow_nulls == DISALLOW_NULLS && character == 0) { | 6067 if (allow_nulls == DISALLOW_NULLS && character == 0) { |
| 6065 character = ' '; | 6068 character = ' '; |
| 6066 } | 6069 } |
| 6067 utf8_byte_position += | 6070 utf8_byte_position += |
| 6068 unibrow::Utf8::Encode(result + utf8_byte_position, character); | 6071 unibrow::Utf8::Encode(result + utf8_byte_position, character, last); |
| 6072 last = character; | |
| 6069 } | 6073 } |
| 6070 result[utf8_byte_position] = 0; | 6074 result[utf8_byte_position] = 0; |
| 6071 return SmartArrayPointer<char>(result); | 6075 return SmartArrayPointer<char>(result); |
| 6072 } | 6076 } |
| 6073 | 6077 |
| 6074 | 6078 |
| 6075 SmartArrayPointer<char> String::ToCString(AllowNullsFlag allow_nulls, | 6079 SmartArrayPointer<char> String::ToCString(AllowNullsFlag allow_nulls, |
| 6076 RobustnessFlag robust_flag, | 6080 RobustnessFlag robust_flag, |
| 6077 int* length_return) { | 6081 int* length_return) { |
| 6078 return ToCString(allow_nulls, robust_flag, 0, -1, length_return); | 6082 return ToCString(allow_nulls, robust_flag, 0, -1, length_return); |
| (...skipping 295 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 6374 break; | 6378 break; |
| 6375 } | 6379 } |
| 6376 | 6380 |
| 6377 UNREACHABLE(); | 6381 UNREACHABLE(); |
| 6378 return 0; | 6382 return 0; |
| 6379 } | 6383 } |
| 6380 | 6384 |
| 6381 | 6385 |
| 6382 // This method determines the type of string involved and then gets the UTF8 | 6386 // This method determines the type of string involved and then gets the UTF8 |
| 6383 // length of the string. It doesn't flatten the string and has log(n) recursion | 6387 // length of the string. It doesn't flatten the string and has log(n) recursion |
| 6384 // for a string of length n. | 6388 // for a string of length n. If the failure flag gets set, then we have to |
| 6385 int String::Utf8Length(String* input, int from, int to) { | 6389 // flatten the string and retry. Failures are caused by surrogate pairs in deep |
|
rossberg
2012/03/07 13:32:47
Not sure I understand the failure mode. Why does t
Erik Corry
2012/03/11 19:29:22
The old version of this function was constructed a
| |
| 6390 // cons strings. | |
| 6391 int String::Utf8Length(String* input, | |
| 6392 int from, | |
| 6393 int to, | |
| 6394 bool followed_by_surrogate, | |
| 6395 int max_recursion, | |
| 6396 bool* failure, | |
| 6397 bool* starts_with_surrogate) { | |
| 6386 if (from == to) return 0; | 6398 if (from == to) return 0; |
| 6387 int total = 0; | 6399 int total = 0; |
| 6400 bool dummy; | |
| 6388 while (true) { | 6401 while (true) { |
| 6389 if (input->IsAsciiRepresentation()) return total + to - from; | 6402 if (input->IsAsciiRepresentation()) { |
| 6403 *starts_with_surrogate = false; | |
| 6404 return total + to - from; | |
| 6405 } | |
| 6390 switch (StringShape(input).representation_tag()) { | 6406 switch (StringShape(input).representation_tag()) { |
| 6391 case kConsStringTag: { | 6407 case kConsStringTag: { |
| 6392 ConsString* str = ConsString::cast(input); | 6408 ConsString* str = ConsString::cast(input); |
| 6393 String* first = str->first(); | 6409 String* first = str->first(); |
| 6394 String* second = str->second(); | 6410 String* second = str->second(); |
| 6395 int first_length = first->length(); | 6411 int first_length = first->length(); |
| 6396 if (first_length - from < to - first_length) { | 6412 if (first_length - from > to - first_length) { |
| 6397 if (first_length > from) { | 6413 if (first_length < to) { |
| 6398 // Left hand side is shorter. | |
| 6399 total += Utf8Length(first, from, first_length); | |
| 6400 input = second; | |
| 6401 from = 0; | |
| 6402 to -= first_length; | |
| 6403 } else { | |
| 6404 // We only need the right hand side. | |
| 6405 input = second; | |
| 6406 from -= first_length; | |
| 6407 to -= first_length; | |
| 6408 } | |
| 6409 } else { | |
| 6410 if (first_length <= to) { | |
| 6411 // Right hand side is shorter. | 6414 // Right hand side is shorter. |
| 6412 total += Utf8Length(second, 0, to - first_length); | 6415 bool right_starts_with_surrogate = false; |
| 6416 total += Utf8Length(second, | |
| 6417 0, | |
| 6418 to - first_length, | |
| 6419 followed_by_surrogate, | |
| 6420 max_recursion - 1, | |
| 6421 failure, | |
| 6422 &right_starts_with_surrogate); | |
| 6423 if (*failure) return 0; | |
| 6424 followed_by_surrogate = right_starts_with_surrogate; | |
| 6413 input = first; | 6425 input = first; |
| 6414 to = first_length; | 6426 to = first_length; |
| 6415 } else { | 6427 } else { |
| 6416 // We only need the left hand side. | 6428 // We only need the left hand side. |
| 6417 input = first; | 6429 input = first; |
| 6418 } | 6430 } |
| 6431 } else { | |
| 6432 if (first_length > from) { | |
| 6433 // Left hand side is shorter. | |
| 6434 if (first->IsAsciiRepresentation()) { | |
| 6435 total += first_length - from; | |
| 6436 *starts_with_surrogate = false; | |
| 6437 starts_with_surrogate = &dummy; | |
| 6438 input = second; | |
| 6439 from = 0; | |
| 6440 to -= first_length; | |
| 6441 } else if (second->IsAsciiRepresentation()) { | |
| 6442 followed_by_surrogate = false; | |
| 6443 total += to - first_length; | |
| 6444 input = first; | |
| 6445 to = first_length; | |
| 6446 } else if (max_recursion > 0) { | |
|
rossberg
2012/03/07 13:32:47
Why is this the only recursive path actually check
Erik Corry
2012/03/11 19:29:22
See above.
| |
| 6447 bool right_starts_with_surrogate = false; | |
| 6448 // Recursing on the long one. This may fail. | |
| 6449 total += Utf8Length(second, | |
| 6450 0, | |
| 6451 to - first_length, | |
| 6452 followed_by_surrogate, | |
| 6453 max_recursion - 1, | |
| 6454 failure, | |
| 6455 &right_starts_with_surrogate); | |
| 6456 if (*failure) return 0; | |
| 6457 input = first; | |
| 6458 to = first_length; | |
| 6459 followed_by_surrogate = right_starts_with_surrogate; | |
| 6460 } else { | |
| 6461 *failure = true; | |
| 6462 return 0; | |
| 6463 } | |
| 6464 } else { | |
| 6465 // We only need the right hand side. | |
| 6466 input = second; | |
| 6467 from = 0; | |
| 6468 to -= first_length; | |
| 6469 } | |
| 6419 } | 6470 } |
| 6420 continue; | 6471 continue; |
| 6421 } | 6472 } |
| 6422 case kExternalStringTag: | 6473 case kExternalStringTag: |
| 6423 case kSeqStringTag: { | 6474 case kSeqStringTag: { |
| 6424 Vector<const uc16> vector = input->GetFlatContent().ToUC16Vector(); | 6475 Vector<const uc16> vector = input->GetFlatContent().ToUC16Vector(); |
| 6425 const uc16* p = vector.start(); | 6476 const uc16* p = vector.start(); |
| 6477 int previous = unibrow::Utf8::kNoPreviousCharacter; | |
| 6426 for (int i = from; i < to; i++) { | 6478 for (int i = from; i < to; i++) { |
| 6427 total += unibrow::Utf8::Length(p[i]); | 6479 uc16 c = p[i]; |
| 6480 total += unibrow::Utf8::Length(c, previous); | |
| 6481 previous = c; | |
| 6482 } | |
| 6483 if (to - from > 0) { | |
| 6484 if (unibrow::Utf16::IsLeadSurrogate(previous) && | |
| 6485 followed_by_surrogate) { | |
| 6486 total -= 2; | |
|
rossberg
2012/03/07 13:32:47
I wouldn't mind a comment here, why -2? Is that kS
Erik Corry
2012/03/11 19:29:22
No, it's kBytesSavedByCombiningSurrogates. Fixed.
| |
| 6487 } | |
| 6488 if (unibrow::Utf16::IsTrailSurrogate(p[from])) { | |
| 6489 *starts_with_surrogate = true; | |
| 6490 } | |
| 6428 } | 6491 } |
| 6429 return total; | 6492 return total; |
| 6430 } | 6493 } |
| 6431 case kSlicedStringTag: { | 6494 case kSlicedStringTag: { |
| 6432 SlicedString* str = SlicedString::cast(input); | 6495 SlicedString* str = SlicedString::cast(input); |
| 6433 int offset = str->offset(); | 6496 int offset = str->offset(); |
| 6434 input = str->parent(); | 6497 input = str->parent(); |
| 6435 from += offset; | 6498 from += offset; |
| 6436 to += offset; | 6499 to += offset; |
| 6437 continue; | 6500 continue; |
| (...skipping 394 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 6832 } | 6895 } |
| 6833 } | 6896 } |
| 6834 } | 6897 } |
| 6835 | 6898 |
| 6836 | 6899 |
| 6837 template <typename IteratorA, typename IteratorB> | 6900 template <typename IteratorA, typename IteratorB> |
| 6838 static inline bool CompareStringContents(IteratorA* ia, IteratorB* ib) { | 6901 static inline bool CompareStringContents(IteratorA* ia, IteratorB* ib) { |
| 6839 // General slow case check. We know that the ia and ib iterators | 6902 // General slow case check. We know that the ia and ib iterators |
| 6840 // have the same length. | 6903 // have the same length. |
| 6841 while (ia->has_more()) { | 6904 while (ia->has_more()) { |
| 6842 uc32 ca = ia->GetNext(); | 6905 uint32_t ca = ia->GetNext(); |
| 6843 uc32 cb = ib->GetNext(); | 6906 uint32_t cb = ib->GetNext(); |
| 6907 ASSERT(ca <= unibrow::Utf16::kMaxNonSurrogateCharCode); | |
| 6908 ASSERT(cb <= unibrow::Utf16::kMaxNonSurrogateCharCode); | |
| 6844 if (ca != cb) | 6909 if (ca != cb) |
| 6845 return false; | 6910 return false; |
| 6846 } | 6911 } |
| 6847 return true; | 6912 return true; |
| 6848 } | 6913 } |
| 6849 | 6914 |
| 6850 | 6915 |
| 6851 // Compares the contents of two strings by reading and comparing | 6916 // Compares the contents of two strings by reading and comparing |
| 6852 // int-sized blocks of characters. | 6917 // int-sized blocks of characters. |
| 6853 template <typename Char> | 6918 template <typename Char> |
| (...skipping 162 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 7016 | 7081 |
| 7017 | 7082 |
| 7018 bool String::IsEqualTo(Vector<const char> str) { | 7083 bool String::IsEqualTo(Vector<const char> str) { |
| 7019 Isolate* isolate = GetIsolate(); | 7084 Isolate* isolate = GetIsolate(); |
| 7020 int slen = length(); | 7085 int slen = length(); |
| 7021 Access<UnicodeCache::Utf8Decoder> | 7086 Access<UnicodeCache::Utf8Decoder> |
| 7022 decoder(isolate->unicode_cache()->utf8_decoder()); | 7087 decoder(isolate->unicode_cache()->utf8_decoder()); |
| 7023 decoder->Reset(str.start(), str.length()); | 7088 decoder->Reset(str.start(), str.length()); |
| 7024 int i; | 7089 int i; |
| 7025 for (i = 0; i < slen && decoder->has_more(); i++) { | 7090 for (i = 0; i < slen && decoder->has_more(); i++) { |
| 7026 uc32 r = decoder->GetNext(); | 7091 uint32_t r = decoder->GetNext(); |
| 7027 if (Get(i) != r) return false; | 7092 if (r > unibrow::Utf16::kMaxNonSurrogateCharCode) { |
| 7093 if (i > slen - 1) return false; | |
| 7094 if (Get(i++) != unibrow::Utf16::LeadSurrogate(r)) return false; | |
| 7095 if (Get(i) != unibrow::Utf16::TrailSurrogate(r)) return false; | |
| 7096 } else { | |
| 7097 if (Get(i) != r) return false; | |
| 7098 } | |
| 7028 } | 7099 } |
| 7029 return i == slen && !decoder->has_more(); | 7100 return i == slen && !decoder->has_more(); |
| 7030 } | 7101 } |
| 7031 | 7102 |
| 7032 | 7103 |
| 7033 bool String::IsAsciiEqualTo(Vector<const char> str) { | 7104 bool String::IsAsciiEqualTo(Vector<const char> str) { |
| 7034 int slen = length(); | 7105 int slen = length(); |
| 7035 if (str.length() != slen) return false; | 7106 if (str.length() != slen) return false; |
| 7036 FlatContent content = GetFlatContent(); | 7107 FlatContent content = GetFlatContent(); |
| 7037 if (content.IsAscii()) { | 7108 if (content.IsAscii()) { |
| (...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 7147 value <<= String::kHashShift; | 7218 value <<= String::kHashShift; |
| 7148 value |= length << String::kArrayIndexHashLengthShift; | 7219 value |= length << String::kArrayIndexHashLengthShift; |
| 7149 | 7220 |
| 7150 ASSERT((value & String::kIsNotArrayIndexMask) == 0); | 7221 ASSERT((value & String::kIsNotArrayIndexMask) == 0); |
| 7151 ASSERT((length > String::kMaxCachedArrayIndexLength) || | 7222 ASSERT((length > String::kMaxCachedArrayIndexLength) || |
| 7152 (value & String::kContainsCachedArrayIndexMask) == 0); | 7223 (value & String::kContainsCachedArrayIndexMask) == 0); |
| 7153 return value; | 7224 return value; |
| 7154 } | 7225 } |
| 7155 | 7226 |
| 7156 | 7227 |
| 7228 void StringHasher::AddSurrogatePair(uc32 c) { | |
| 7229 uint16_t lead = unibrow::Utf16::LeadSurrogate(c); | |
| 7230 AddCharacter(lead); | |
| 7231 uint16_t trail = unibrow::Utf16::TrailSurrogate(c); | |
| 7232 AddCharacter(trail); | |
| 7233 } | |
| 7234 | |
| 7235 | |
| 7236 void StringHasher::AddSurrogatePairNoIndex(uc32 c) { | |
| 7237 uint16_t lead = unibrow::Utf16::LeadSurrogate(c); | |
| 7238 AddCharacterNoIndex(lead); | |
| 7239 uint16_t trail = unibrow::Utf16::TrailSurrogate(c); | |
| 7240 AddCharacterNoIndex(trail); | |
| 7241 } | |
| 7242 | |
| 7243 | |
| 7157 uint32_t StringHasher::GetHashField() { | 7244 uint32_t StringHasher::GetHashField() { |
| 7158 ASSERT(is_valid()); | 7245 ASSERT(is_valid()); |
| 7159 if (length_ <= String::kMaxHashCalcLength) { | 7246 if (length_ <= String::kMaxHashCalcLength) { |
| 7160 if (is_array_index()) { | 7247 if (is_array_index()) { |
| 7161 return MakeArrayIndexHash(array_index(), length_); | 7248 return MakeArrayIndexHash(array_index(), length_); |
| 7162 } | 7249 } |
| 7163 return (GetHash() << String::kHashShift) | String::kIsNotArrayIndexMask; | 7250 return (GetHash() << String::kHashShift) | String::kIsNotArrayIndexMask; |
| 7164 } else { | 7251 } else { |
| 7165 return (length_ << String::kHashShift) | String::kIsNotArrayIndexMask; | 7252 return (length_ << String::kHashShift) | String::kIsNotArrayIndexMask; |
| 7166 } | 7253 } |
| (...skipping 3572 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 10739 : string_(string), hash_field_(0), seed_(seed) { } | 10826 : string_(string), hash_field_(0), seed_(seed) { } |
| 10740 | 10827 |
| 10741 bool IsMatch(Object* string) { | 10828 bool IsMatch(Object* string) { |
| 10742 return String::cast(string)->IsEqualTo(string_); | 10829 return String::cast(string)->IsEqualTo(string_); |
| 10743 } | 10830 } |
| 10744 | 10831 |
| 10745 uint32_t Hash() { | 10832 uint32_t Hash() { |
| 10746 if (hash_field_ != 0) return hash_field_ >> String::kHashShift; | 10833 if (hash_field_ != 0) return hash_field_ >> String::kHashShift; |
| 10747 unibrow::Utf8InputBuffer<> buffer(string_.start(), | 10834 unibrow::Utf8InputBuffer<> buffer(string_.start(), |
| 10748 static_cast<unsigned>(string_.length())); | 10835 static_cast<unsigned>(string_.length())); |
| 10749 chars_ = buffer.Length(); | 10836 chars_ = buffer.Utf16Length(); |
| 10750 hash_field_ = String::ComputeHashField(&buffer, chars_, seed_); | 10837 hash_field_ = String::ComputeHashField(&buffer, chars_, seed_); |
| 10751 uint32_t result = hash_field_ >> String::kHashShift; | 10838 uint32_t result = hash_field_ >> String::kHashShift; |
| 10752 ASSERT(result != 0); // Ensure that the hash value of 0 is never computed. | 10839 ASSERT(result != 0); // Ensure that the hash value of 0 is never computed. |
| 10753 return result; | 10840 return result; |
| 10754 } | 10841 } |
| 10755 | 10842 |
| 10756 uint32_t HashForObject(Object* other) { | 10843 uint32_t HashForObject(Object* other) { |
| 10757 return String::cast(other)->Hash(); | 10844 return String::cast(other)->Hash(); |
| 10758 } | 10845 } |
| 10759 | 10846 |
| (...skipping 2197 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 12957 if (break_point_objects()->IsUndefined()) return 0; | 13044 if (break_point_objects()->IsUndefined()) return 0; |
| 12958 // Single break point. | 13045 // Single break point. |
| 12959 if (!break_point_objects()->IsFixedArray()) return 1; | 13046 if (!break_point_objects()->IsFixedArray()) return 1; |
| 12960 // Multiple break points. | 13047 // Multiple break points. |
| 12961 return FixedArray::cast(break_point_objects())->length(); | 13048 return FixedArray::cast(break_point_objects())->length(); |
| 12962 } | 13049 } |
| 12963 #endif // ENABLE_DEBUGGER_SUPPORT | 13050 #endif // ENABLE_DEBUGGER_SUPPORT |
| 12964 | 13051 |
| 12965 | 13052 |
| 12966 } } // namespace v8::internal | 13053 } } // namespace v8::internal |
| OLD | NEW |