Chromium Code Reviews| Index: src/objects.cc |
| diff --git a/src/objects.cc b/src/objects.cc |
| index cea724fa6f29293e2878d9598731797e26e97f70..843574a96e2f1656e26853d09756458ee39e72e4 100644 |
| --- a/src/objects.cc |
| +++ b/src/objects.cc |
| @@ -7642,14 +7642,20 @@ bool String::MarkAsUndetectable() { |
| bool String::IsEqualTo(Vector<const char> str) { |
| - Isolate* isolate = GetIsolate(); |
| int slen = length(); |
| - Access<UnicodeCache::Utf8Decoder> |
| - decoder(isolate->unicode_cache()->utf8_decoder()); |
| - decoder->Reset(str.start(), str.length()); |
| + // Can't check exact length equality, but we can check bounds. |
| + int str_len = str.length(); |
| + if (str_len < slen || |
| + str_len > slen*static_cast<int>(unibrow::Utf8::kMaxEncodedSize)) { |
| + return false; |
| + } |
| int i; |
| - for (i = 0; i < slen && decoder->has_more(); i++) { |
| - uint32_t r = decoder->GetNext(); |
| + unsigned remaining_in_str = static_cast<unsigned>(str_len); |
| + const uint8_t* utf8_data = reinterpret_cast<const uint8_t*>(str.start()); |
| + for (i = 0; i < slen && remaining_in_str > 0; i++) { |
| + unsigned cursor = 0; |
| + uint32_t r = unibrow::Utf8::ValueOf(utf8_data, remaining_in_str, &cursor); |
| + ASSERT(cursor > 0 && cursor <= remaining_in_str); |
| if (r > unibrow::Utf16::kMaxNonSurrogateCharCode) { |
| if (i > slen - 1) return false; |
| if (Get(i++) != unibrow::Utf16::LeadSurrogate(r)) return false; |
| @@ -7657,8 +7663,10 @@ bool String::IsEqualTo(Vector<const char> str) { |
| } else { |
| if (Get(i) != r) return false; |
| } |
| + utf8_data += cursor; |
| + remaining_in_str -= cursor; |
| } |
| - return i == slen && !decoder->has_more(); |
| + return i == slen && remaining_in_str == 0; |
| } |
| @@ -7863,46 +7871,51 @@ uint32_t StringHasher::GetHashField() { |
| } |
| -uint32_t StringHasher::ComputeHashField(unibrow::CharacterStream* buffer, |
| - int length, |
| - uint32_t seed) { |
| - typedef unibrow::Utf16 u; |
| - StringHasher hasher(length, seed); |
| - // Very long strings have a trivial hash that doesn't inspect the |
| - // string contents. |
| - if (hasher.has_trivial_hash()) { |
| - return hasher.GetHashField(); |
| - } |
| - // Do the iterative array index computation as long as there is a |
| - // chance this is an array index. |
| - if (hasher.is_array_index_) { |
| - while (buffer->has_more()) { |
| - uint32_t c = buffer->GetNext(); |
| - if (c > u::kMaxNonSurrogateCharCode) { |
| - uint16_t c1 = u::LeadSurrogate(c); |
| - uint16_t c2 = u::TrailSurrogate(c); |
| - hasher.AddCharacter(c1); |
| - hasher.AddCharacter(c2); |
| - if (!hasher.UpdateIndex(c1)) break; |
| - if (!hasher.UpdateIndex(c2)) break; |
| - } else { |
| - hasher.AddCharacter(c); |
| - if (!hasher.UpdateIndex(c)) break; |
| - } |
| - } |
| - } |
| - // Process the remaining characters without updating the array |
| - // index. |
| - while (buffer->has_more()) { |
| - ASSERT(!hasher.is_array_index_); |
| - uint32_t c = buffer->GetNext(); |
| - if (c > u::kMaxNonSurrogateCharCode) { |
| - hasher.AddCharacter(u::LeadSurrogate(c)); |
| - hasher.AddCharacter(u::TrailSurrogate(c)); |
| +uint32_t StringHasher::ComputeUtf8Hash(Vector<const char> chars, |
| + uint32_t seed, |
| + int* utf16_length_out) { |
| + int vector_length = chars.length(); |
| + // Handle some edge cases |
| + if (vector_length <= 1) { |
| + ASSERT(vector_length == 0 || |
| + static_cast<uint8_t>(chars.start()[0]) <= |
| + unibrow::Utf8::kMaxOneByteChar); |
| + *utf16_length_out = vector_length; |
| + return HashSequentialString(chars.start(), vector_length, seed); |
| + } |
| + // Start with a fake length which won't affect computation. |
| + // It will be updated later. |
| + StringHasher hasher(String::kMaxArrayIndexSize, seed); |
| + unsigned remaining = static_cast<unsigned>(vector_length); |
| + const uint8_t* stream = reinterpret_cast<const uint8_t*>(chars.start()); |
| + int utf16_length = 0; |
| + bool is_index = true; |
| + ASSERT(hasher.is_array_index_); |
| + while (remaining > 0) { |
| + unsigned consumed = 0; |
| + uint32_t c = unibrow::Utf8::ValueOf(stream, remaining, &consumed); |
| + ASSERT(consumed > 0 && consumed <= remaining); |
| + stream += consumed; |
| + remaining -= consumed; |
| + bool is_two_byte = c > unibrow::Utf16::kMaxNonSurrogateCharCode; |
| + utf16_length += is_two_byte ? 2 : 1; |
|
Yang
2012/12/20 09:20:27
is_two_byte is a misnomer?
|
| + // No need to keep hashing. But we do need to calculate utf16_length. |
| + if (utf16_length > String::kMaxHashCalcLength) continue; |
| + if (is_two_byte) { |
| + uint16_t c1 = unibrow::Utf16::LeadSurrogate(c); |
| + uint16_t c2 = unibrow::Utf16::TrailSurrogate(c); |
| + hasher.AddCharacter(c1); |
| + hasher.AddCharacter(c2); |
| + if (is_index) is_index = hasher.UpdateIndex(c1); |
| + if (is_index) is_index = hasher.UpdateIndex(c2); |
| } else { |
| hasher.AddCharacter(c); |
| + if (is_index) is_index = hasher.UpdateIndex(c); |
| } |
| } |
| + *utf16_length_out = static_cast<int>(utf16_length); |
| + // Must set length here so that hash computation is correct. |
| + hasher.length_ = utf16_length; |
| return hasher.GetHashField(); |
| } |
| @@ -11717,10 +11730,7 @@ class Utf8SymbolKey : public HashTableKey { |
| uint32_t Hash() { |
| if (hash_field_ != 0) return hash_field_ >> String::kHashShift; |
| - unibrow::Utf8InputBuffer<> buffer(string_.start(), |
| - static_cast<unsigned>(string_.length())); |
| - chars_ = buffer.Utf16Length(); |
| - hash_field_ = StringHasher::ComputeHashField(&buffer, chars_, seed_); |
| + hash_field_ = StringHasher::ComputeUtf8Hash(string_, seed_, &chars_); |
| uint32_t result = hash_field_ >> String::kHashShift; |
| ASSERT(result != 0); // Ensure that the hash value of 0 is never computed. |
| return result; |