Chromium Code Reviews| Index: runtime/vm/symbols.cc |
| =================================================================== |
| --- runtime/vm/symbols.cc (revision 38342) |
| +++ runtime/vm/symbols.cc (working copy) |
| @@ -6,6 +6,7 @@ |
| #include "vm/handles.h" |
| #include "vm/handles_impl.h" |
| +#include "vm/hash_table.h" |
| #include "vm/isolate.h" |
| #include "vm/object.h" |
| #include "vm/object_store.h" |
| @@ -35,9 +36,6 @@ |
| #undef DEFINE_KEYWORD_SYMBOL_INDEX |
| }; |
| -intptr_t Symbols::num_of_grows_; |
| -intptr_t Symbols::collision_count_[kMaxCollisionBuckets]; |
| - |
| DEFINE_FLAG(bool, dump_symbol_stats, false, "Dump symbol table statistics"); |
| @@ -61,30 +59,17 @@ |
| // Should only be run by the vm isolate. |
| ASSERT(isolate == Dart::vm_isolate()); |
| - if (FLAG_dump_symbol_stats) { |
| - num_of_grows_ = 0; |
| - for (intptr_t i = 0; i < kMaxCollisionBuckets; i++) { |
| - collision_count_[i] = 0; |
| - } |
| - } |
| - |
| // Create and setup a symbol table in the vm isolate. |
| SetupSymbolTable(isolate); |
| // Create all predefined symbols. |
| ASSERT((sizeof(names) / sizeof(const char*)) == Symbols::kNullCharId); |
| - ObjectStore* object_store = isolate->object_store(); |
| - Array& symbol_table = Array::Handle(); |
| - |
| // First set up all the predefined string symbols. |
| for (intptr_t i = 1; i < Symbols::kKwTableStart; i++) { |
| - // The symbol_table needs to be reloaded as it might have grown in the |
| - // previous iteration. |
| - symbol_table = object_store->symbol_table(); |
| String* str = String::ReadOnlyHandle(); |
| *str = OneByteString::New(names[i], Heap::kOld); |
| - Add(symbol_table, *str); |
| + AddToVMIsolate(*str); |
| symbol_handles_[i] = str; |
| } |
| Object::RegisterSingletonClassNames(); |
| @@ -100,54 +85,156 @@ |
| // Add Latin1 characters as Symbols, so that Symbols::FromCharCode is fast. |
| for (intptr_t c = 0; c < kNumberOfOneCharCodeSymbols; c++) { |
| - // The symbol_table needs to be reloaded as it might have grown in the |
| - // previous iteration. |
| - symbol_table = object_store->symbol_table(); |
| intptr_t idx = (kNullCharId + c); |
| ASSERT(idx < kMaxPredefinedId); |
| ASSERT(Utf::IsLatin1(c)); |
| uint8_t ch = static_cast<uint8_t>(c); |
| String* str = String::ReadOnlyHandle(); |
| *str = OneByteString::New(&ch, 1, Heap::kOld); |
| - Add(symbol_table, *str); |
| + AddToVMIsolate(*str); |
| predefined_[c] = str->raw(); |
| symbol_handles_[idx] = str; |
| } |
| } |
| +RawString* StringFrom(const uint8_t* data, intptr_t len, Heap::Space space) { |
| + return String::FromLatin1(data, len, space); |
| +} |
| +RawString* StringFrom(const uint16_t* data, intptr_t len, Heap::Space space) { |
| + return String::FromUTF16(data, len, space); |
| +} |
| +RawString* StringFrom(const int32_t* data, intptr_t len, Heap::Space space) { |
| + return String::FromUTF32(data, len, space); |
| +} |
| + |
| + |
| +template<typename CharType> |
| +class CharArray { |
| + public: |
| + CharArray(const CharType* data, intptr_t len) |
| + : data_(data), len_(len) { |
| + hash_ = String::Hash(data, len); |
| + } |
| + RawString* ToSymbol() const { |
| + String& result = String::Handle(StringFrom(data_, len_, Heap::kOld)); |
| + result.SetCanonical(); |
| + result.SetHash(hash_); |
| + return result.raw(); |
| + } |
| + bool Equals(const String& other) const { |
| + return other.Equals(data_, len_); |
| + } |
| + intptr_t Hash() const { return hash_; } |
| + private: |
| + const CharType* data_; |
| + intptr_t len_; |
| + intptr_t hash_; |
| +}; |
| +typedef CharArray<uint8_t> Latin1Array; |
| +typedef CharArray<uint16_t> UTF16Array; |
| +typedef CharArray<int32_t> UTF32Array; |
| + |
| + |
| +class StringSlice { |
| + public: |
| + StringSlice(const String* str, intptr_t begin_index, intptr_t length) |
|
Ivan Posva
2014/07/29 21:55:10
Why do you use "const String* str" instead of a re
koda
2014/07/29 22:31:50
A remnant from when this was just a struct. Change
|
| + : str_(str), begin_index_(begin_index), len_(length) { |
| + hash_ = is_all() ? str->Hash() : String::Hash(*str, begin_index, length); |
| + } |
| + RawString* ToSymbol() const; |
| + bool Equals(const String& other) const { |
| + return other.Equals(*str_, begin_index_, len_); |
| + } |
| + intptr_t Hash() const { return hash_; } |
| + private: |
| + bool is_all() const { return begin_index_ == 0 && len_ == str_->Length(); } |
| + const String* str_; |
| + intptr_t begin_index_; |
| + intptr_t len_; |
| + intptr_t hash_; |
| +}; |
| + |
| + |
| +RawString* StringSlice::ToSymbol() const { |
| + if (is_all() && str_->IsOld()) { |
| + str_->SetCanonical(); |
| + return str_->raw(); |
| + } else { |
| + String& result = String::Handle( |
| + String::SubString(*str_, begin_index_, len_, Heap::kOld)); |
| + result.SetCanonical(); |
| + result.SetHash(hash_); |
| + return result.raw(); |
| + } |
| +} |
| + |
| + |
| +class SymbolTraits { |
| + public: |
| + static bool IsMatch(const Object& a, const Object& b) { |
| + return String::Cast(a).Equals(String::Cast(b)); |
| + } |
| + template<typename CharType> |
|
Ivan Posva
2014/07/23 12:09:26
As below I would like to discuss this in person as
koda
2014/07/29 22:31:50
Acknowledged.
|
| + static bool IsMatch(const CharArray<CharType>& array, const Object& obj) { |
| + return array.Equals(String::Cast(obj)); |
| + } |
| + static bool IsMatch(const StringSlice& slice, const Object& obj) { |
| + return slice.Equals(String::Cast(obj)); |
| + } |
| + static uword Hash(const Object& key) { |
| + return String::Cast(key).Hash(); |
| + } |
| + template<typename CharType> |
| + static uword Hash(const CharArray<CharType>& array) { |
| + return array.Hash(); |
| + } |
| + static uword Hash(const StringSlice& slice) { |
| + return slice.Hash(); |
| + } |
| + template<typename CharType> |
| + static RawObject* NewKey(const CharArray<CharType>& array) { |
| + return array.ToSymbol(); |
| + } |
| + static RawObject* NewKey(const StringSlice& slice) { |
| + return slice.ToSymbol(); |
| + } |
| +}; |
| +typedef UnorderedHashSet<SymbolTraits> SymbolTable; |
| + |
| + |
| void Symbols::SetupSymbolTable(Isolate* isolate) { |
| ASSERT(isolate != NULL); |
| // Setup the symbol table used within the String class. |
| const intptr_t initial_size = (isolate == Dart::vm_isolate()) ? |
| kInitialVMIsolateSymtabSize : kInitialSymtabSize; |
| - const Array& array = Array::Handle(Array::New(initial_size + 1, Heap::kOld)); |
| - |
| - // Last element contains the count of used slots. |
| - array.SetAt(initial_size, Smi::Handle(Smi::New(0))); |
| + Array& array = |
| + Array::Handle(HashTables::New<SymbolTable>(initial_size, Heap::kOld)); |
| isolate->object_store()->set_symbol_table(array); |
| } |
| -intptr_t Symbols::Size(Isolate* isolate) { |
| +void Symbols::GetStats(Isolate* isolate, intptr_t* size, intptr_t* capacity) { |
| ASSERT(isolate != NULL); |
| - Array& symbol_table = Array::Handle(isolate, |
| - isolate->object_store()->symbol_table()); |
| - intptr_t table_size_index = symbol_table.Length() - 1; |
| - dart::Smi& used = Smi::Handle(); |
| - used ^= symbol_table.At(table_size_index); |
| - return used.Value(); |
| + SymbolTable table(Array::Handle(isolate->object_store()->symbol_table())); |
| + *size = table.NumOccupied(); |
| + *capacity = table.NumEntries(); |
| + table.Release(); |
| } |
| -void Symbols::Add(const Array& symbol_table, const String& str) { |
| +void Symbols::AddToVMIsolate(const String& str) { |
| // Should only be run by the vm isolate. |
| ASSERT(Isolate::Current() == Dart::vm_isolate()); |
| - intptr_t hash = str.Hash(); |
| - intptr_t index = FindIndex(symbol_table, str, 0, str.Length(), hash); |
| - ASSERT(symbol_table.At(index) == String::null()); |
| - InsertIntoSymbolTable(symbol_table, str, index); |
| + Isolate* isolate = Dart::vm_isolate(); |
| + Array& array = Array::Handle(isolate->object_store()->symbol_table()); |
| + SymbolTable table(array); |
| + bool present = table.Insert(str); |
| + str.SetCanonical(); |
| + ASSERT(!present); |
| + isolate->object_store()->set_symbol_table(array); |
|
Ivan Posva
2014/07/23 12:09:26
I find this pattern really awkward. Reading just t
koda
2014/07/29 22:31:50
Acknowledged.
|
| + table.Release(); |
| } |
| @@ -179,71 +266,43 @@ |
| RawString* Symbols::FromLatin1(const uint8_t* latin1_array, intptr_t len) { |
| - return NewSymbol(latin1_array, len, String::FromLatin1); |
| + return NewSymbol(Latin1Array(latin1_array, len)); |
| } |
| RawString* Symbols::FromUTF16(const uint16_t* utf16_array, intptr_t len) { |
| - return NewSymbol(utf16_array, len, String::FromUTF16); |
| + return NewSymbol(UTF16Array(utf16_array, len)); |
| } |
| RawString* Symbols::FromUTF32(const int32_t* utf32_array, intptr_t len) { |
| - return NewSymbol(utf32_array, len, String::FromUTF32); |
| + return NewSymbol(UTF32Array(utf32_array, len)); |
| } |
| -template<typename CharacterType, typename CallbackType> |
| -RawString* Symbols::NewSymbol(const CharacterType* characters, |
| - intptr_t len, |
| - CallbackType new_string) { |
| - Isolate* isolate = Isolate::Current(); |
| - String& symbol = String::Handle(isolate, String::null()); |
| - Array& symbol_table = Array::Handle(isolate, Array::null()); |
| - |
| - // Calculate the String hash for this sequence of characters. |
| - intptr_t hash = String::Hash(characters, len); |
| - |
| - // First check if a symbol exists in the vm isolate for these characters. |
| - symbol_table = Dart::vm_isolate()->object_store()->symbol_table(); |
| - intptr_t index = FindIndex(symbol_table, characters, len, hash); |
| - symbol ^= symbol_table.At(index); |
| +template<typename StringType> |
|
Ivan Posva
2014/07/29 21:55:10
Please add a comment enumerating all possible Stri
koda
2014/07/29 22:31:49
Done.
|
| +RawString* Symbols::NewSymbol(const StringType& str) { |
| + String& symbol = String::Handle(); |
| + { |
| + Isolate* isolate = Dart::vm_isolate(); |
| + Array& array = Array::Handle(isolate->object_store()->symbol_table()); |
| + SymbolTable table(array); |
| + symbol ^= table.GetOrNull(str); |
| + table.Release(); |
| + } |
| if (symbol.IsNull()) { |
| - // Now try in the symbol table of the current isolate. |
| - symbol_table = isolate->object_store()->symbol_table(); |
| - index = FindIndex(symbol_table, characters, len, hash); |
| - // Since we leave enough room in the table to guarantee, that we find an |
| - // empty spot, index is the insertion point if symbol is null. |
| - symbol ^= symbol_table.At(index); |
| - if (symbol.IsNull()) { |
| - // Allocate new result string. |
| - symbol = (*new_string)(characters, len, Heap::kOld); |
| - symbol.SetHash(hash); // Remember the calculated hash value. |
| - InsertIntoSymbolTable(symbol_table, symbol, index); |
| - } |
| + Isolate* isolate = Isolate::Current(); |
| + Array& array = Array::Handle(isolate->object_store()->symbol_table()); |
| + SymbolTable table(array); |
| + symbol ^= table.InsertNewOrGet(str); |
| + isolate->object_store()->set_symbol_table(array); |
| + table.Release(); |
| } |
| ASSERT(symbol.IsSymbol()); |
| return symbol.raw(); |
| } |
| -template RawString* Symbols::NewSymbol(const uint8_t* characters, |
| - intptr_t len, |
| - RawString* (*new_string)(const uint8_t*, |
| - intptr_t, |
| - Heap::Space)); |
| -template RawString* Symbols::NewSymbol(const uint16_t* characters, |
| - intptr_t len, |
| - RawString* (*new_string)(const uint16_t*, |
| - intptr_t, |
| - Heap::Space)); |
| -template RawString* Symbols::NewSymbol(const int32_t* characters, |
| - intptr_t len, |
| - RawString* (*new_string)(const int32_t*, |
| - intptr_t, |
| - Heap::Space)); |
| - |
| - |
| RawString* Symbols::New(const String& str) { |
| if (str.IsSymbol()) { |
| return str.raw(); |
| @@ -253,43 +312,7 @@ |
| RawString* Symbols::New(const String& str, intptr_t begin_index, intptr_t len) { |
| - ASSERT(begin_index >= 0); |
| - ASSERT(len >= 0); |
| - ASSERT((begin_index + len) <= str.Length()); |
| - Isolate* isolate = Isolate::Current(); |
| - ASSERT(isolate != Dart::vm_isolate()); |
| - String& symbol = String::Handle(isolate, String::null()); |
| - Array& symbol_table = Array::Handle(isolate, Array::null()); |
| - |
| - // Calculate the String hash for this sequence of characters. |
| - intptr_t hash = (begin_index == 0 && len == str.Length()) ? str.Hash() : |
| - String::Hash(str, begin_index, len); |
| - |
| - // First check if a symbol exists in the vm isolate for these characters. |
| - symbol_table = Dart::vm_isolate()->object_store()->symbol_table(); |
| - intptr_t index = FindIndex(symbol_table, str, begin_index, len, hash); |
| - symbol ^= symbol_table.At(index); |
| - if (symbol.IsNull()) { |
| - // Now try in the symbol table of the current isolate. |
| - symbol_table = isolate->object_store()->symbol_table(); |
| - index = FindIndex(symbol_table, str, begin_index, len, hash); |
| - // Since we leave enough room in the table to guarantee, that we find an |
| - // empty spot, index is the insertion point if symbol is null. |
| - symbol ^= symbol_table.At(index); |
| - if (symbol.IsNull()) { |
| - if (str.IsOld() && begin_index == 0 && len == str.Length()) { |
| - // Reuse the incoming str as the symbol value. |
| - symbol = str.raw(); |
| - } else { |
| - // Allocate a copy in old space. |
| - symbol = String::SubString(str, begin_index, len, Heap::kOld); |
| - symbol.SetHash(hash); |
| - } |
| - InsertIntoSymbolTable(symbol_table, symbol, index); |
| - } |
| - } |
| - ASSERT(symbol.IsSymbol()); |
| - return symbol.raw(); |
| + return NewSymbol(StringSlice(&str, begin_index, len)); |
| } |
| @@ -303,163 +326,22 @@ |
| void Symbols::DumpStats() { |
| if (FLAG_dump_symbol_stats) { |
| - intptr_t table_size = 0; |
| - dart::Smi& used = Smi::Handle(); |
| - Array& symbol_table = Array::Handle(Array::null()); |
| - |
| + intptr_t size = -1; |
| + intptr_t capacity = -1; |
| // First dump VM symbol table stats. |
| - symbol_table = Dart::vm_isolate()->object_store()->symbol_table(); |
| - table_size = symbol_table.Length() - 1; |
| - used ^= symbol_table.At(table_size); |
| - OS::Print("VM Isolate: Number of symbols : %" Pd "\n", used.Value()); |
| - OS::Print("VM Isolate: Symbol table capacity : %" Pd "\n", table_size); |
| - |
| + GetStats(Dart::vm_isolate(), &size, &capacity); |
| + OS::Print("VM Isolate: Number of symbols : %" Pd "\n", size); |
| + OS::Print("VM Isolate: Symbol table capacity : %" Pd "\n", capacity); |
| // Now dump regular isolate symbol table stats. |
| - symbol_table = Isolate::Current()->object_store()->symbol_table(); |
| - table_size = symbol_table.Length() - 1; |
| - used ^= symbol_table.At(table_size); |
| - OS::Print("Isolate: Number of symbols : %" Pd "\n", used.Value()); |
| - OS::Print("Isolate: Symbol table capacity : %" Pd "\n", table_size); |
| - |
| - // Dump overall collision and growth counts. |
| - OS::Print("Number of symbol table grows = %" Pd "\n", num_of_grows_); |
| - OS::Print("Collision counts on add and lookup :\n"); |
| - intptr_t i = 0; |
| - for (i = 0; i < (kMaxCollisionBuckets - 1); i++) { |
| - OS::Print(" %" Pd " collisions => %" Pd "\n", i, collision_count_[i]); |
| - } |
| - OS::Print(" > %" Pd " collisions => %" Pd "\n", i, collision_count_[i]); |
| + GetStats(Isolate::Current(), &size, &capacity); |
| + OS::Print("Isolate: Number of symbols : %" Pd "\n", size); |
| + OS::Print("Isolate: Symbol table capacity : %" Pd "\n", capacity); |
| + // TODO(koda): Consider recording growth and collision stats in HashTable, |
| + // in DEBUG mode. |
| } |
| } |
| -void Symbols::GrowSymbolTable(const Array& symbol_table) { |
| - // TODO(iposva): Avoid exponential growth. |
| - num_of_grows_ += 1; |
| - intptr_t table_size = symbol_table.Length() - 1; |
| - intptr_t new_table_size = table_size * 2; |
| - Array& new_symbol_table = |
| - Array::Handle(Array::New(new_table_size + 1, Heap::kOld)); |
| - // Copy all elements from the original symbol table to the newly allocated |
| - // array. |
| - String& element = String::Handle(); |
| - dart::Object& new_element = Object::Handle(); |
| - for (intptr_t i = 0; i < table_size; i++) { |
| - element ^= symbol_table.At(i); |
| - if (!element.IsNull()) { |
| - intptr_t hash = element.Hash(); |
| - intptr_t index = hash % new_table_size; |
| - new_element = new_symbol_table.At(index); |
| - intptr_t num_collisions = 0; |
| - while (!new_element.IsNull()) { |
| - index = (index + 1) % new_table_size; // Move to next element. |
| - new_element = new_symbol_table.At(index); |
| - num_collisions += 1; |
| - } |
| - if (FLAG_dump_symbol_stats) { |
| - if (num_collisions >= kMaxCollisionBuckets) { |
| - num_collisions = (kMaxCollisionBuckets - 1); |
| - } |
| - collision_count_[num_collisions] += 1; |
| - } |
| - new_symbol_table.SetAt(index, element); |
| - } |
| - } |
| - // Copy used count. |
| - new_element = symbol_table.At(table_size); |
| - new_symbol_table.SetAt(new_table_size, new_element); |
| - // Remember the new symbol table now. |
| - Isolate::Current()->object_store()->set_symbol_table(new_symbol_table); |
| -} |
| - |
| - |
| -void Symbols::InsertIntoSymbolTable(const Array& symbol_table, |
| - const String& symbol, |
| - intptr_t index) { |
| - intptr_t table_size = symbol_table.Length() - 1; |
| - symbol.SetCanonical(); // Mark object as being canonical. |
| - symbol_table.SetAt(index, symbol); // Remember the new symbol. |
| - dart::Smi& used = Smi::Handle(); |
| - used ^= symbol_table.At(table_size); |
| - intptr_t used_elements = used.Value() + 1; // One more element added. |
| - used = Smi::New(used_elements); |
| - symbol_table.SetAt(table_size, used); // Update used count. |
| - |
| - // Rehash if symbol_table is 75% full. |
| - if (used_elements > ((table_size / 4) * 3)) { |
| - GrowSymbolTable(symbol_table); |
| - } |
| -} |
| - |
| - |
| -template<typename T> |
| -intptr_t Symbols::FindIndex(const Array& symbol_table, |
| - const T* characters, |
| - intptr_t len, |
| - intptr_t hash) { |
| - // Last element of the array is the number of used elements. |
| - intptr_t table_size = symbol_table.Length() - 1; |
| - intptr_t index = hash % table_size; |
| - intptr_t num_collisions = 0; |
| - |
| - String& symbol = String::Handle(); |
| - symbol ^= symbol_table.At(index); |
| - while (!symbol.IsNull() && !symbol.Equals(characters, len)) { |
| - index = (index + 1) % table_size; // Move to next element. |
| - symbol ^= symbol_table.At(index); |
| - num_collisions += 1; |
| - } |
| - if (FLAG_dump_symbol_stats) { |
| - if (num_collisions >= kMaxCollisionBuckets) { |
| - num_collisions = (kMaxCollisionBuckets - 1); |
| - } |
| - collision_count_[num_collisions] += 1; |
| - } |
| - return index; // Index of symbol if found or slot into which to add symbol. |
| -} |
| - |
| - |
| -template intptr_t Symbols::FindIndex(const Array& symbol_table, |
| - const uint8_t* characters, |
| - intptr_t len, |
| - intptr_t hash); |
| -template intptr_t Symbols::FindIndex(const Array& symbol_table, |
| - const uint16_t* characters, |
| - intptr_t len, |
| - intptr_t hash); |
| -template intptr_t Symbols::FindIndex(const Array& symbol_table, |
| - const int32_t* characters, |
| - intptr_t len, |
| - intptr_t hash); |
| - |
| - |
| -intptr_t Symbols::FindIndex(const Array& symbol_table, |
| - const String& str, |
| - intptr_t begin_index, |
| - intptr_t len, |
| - intptr_t hash) { |
| - // Last element of the array is the number of used elements. |
| - intptr_t table_size = symbol_table.Length() - 1; |
| - intptr_t index = hash % table_size; |
| - intptr_t num_collisions = 0; |
| - |
| - String& symbol = String::Handle(); |
| - symbol ^= symbol_table.At(index); |
| - while (!symbol.IsNull() && !symbol.Equals(str, begin_index, len)) { |
| - index = (index + 1) % table_size; // Move to next element. |
| - symbol ^= symbol_table.At(index); |
| - num_collisions += 1; |
| - } |
| - if (FLAG_dump_symbol_stats) { |
| - if (num_collisions >= kMaxCollisionBuckets) { |
| - num_collisions = (kMaxCollisionBuckets - 1); |
| - } |
| - collision_count_[num_collisions] += 1; |
| - } |
| - return index; // Index of symbol if found or slot into which to add symbol. |
| -} |
| - |
| - |
| intptr_t Symbols::LookupVMSymbol(RawObject* obj) { |
| for (intptr_t i = 1; i < Symbols::kMaxPredefinedId; i++) { |
| if (symbol_handles_[i]->raw() == obj) { |