Index: runtime/vm/symbols.cc |
=================================================================== |
--- runtime/vm/symbols.cc (revision 38342) |
+++ runtime/vm/symbols.cc (working copy) |
@@ -6,6 +6,7 @@ |
#include "vm/handles.h" |
#include "vm/handles_impl.h" |
+#include "vm/hash_table.h" |
#include "vm/isolate.h" |
#include "vm/object.h" |
#include "vm/object_store.h" |
@@ -35,9 +36,6 @@ |
#undef DEFINE_KEYWORD_SYMBOL_INDEX |
}; |
-intptr_t Symbols::num_of_grows_; |
-intptr_t Symbols::collision_count_[kMaxCollisionBuckets]; |
- |
DEFINE_FLAG(bool, dump_symbol_stats, false, "Dump symbol table statistics"); |
@@ -61,30 +59,17 @@ |
// Should only be run by the vm isolate. |
ASSERT(isolate == Dart::vm_isolate()); |
- if (FLAG_dump_symbol_stats) { |
- num_of_grows_ = 0; |
- for (intptr_t i = 0; i < kMaxCollisionBuckets; i++) { |
- collision_count_[i] = 0; |
- } |
- } |
- |
// Create and setup a symbol table in the vm isolate. |
SetupSymbolTable(isolate); |
// Create all predefined symbols. |
ASSERT((sizeof(names) / sizeof(const char*)) == Symbols::kNullCharId); |
- ObjectStore* object_store = isolate->object_store(); |
- Array& symbol_table = Array::Handle(); |
- |
// First set up all the predefined string symbols. |
for (intptr_t i = 1; i < Symbols::kKwTableStart; i++) { |
- // The symbol_table needs to be reloaded as it might have grown in the |
- // previous iteration. |
- symbol_table = object_store->symbol_table(); |
String* str = String::ReadOnlyHandle(); |
*str = OneByteString::New(names[i], Heap::kOld); |
- Add(symbol_table, *str); |
+ AddToVMIsolate(*str); |
symbol_handles_[i] = str; |
} |
Object::RegisterSingletonClassNames(); |
@@ -100,54 +85,156 @@ |
// Add Latin1 characters as Symbols, so that Symbols::FromCharCode is fast. |
for (intptr_t c = 0; c < kNumberOfOneCharCodeSymbols; c++) { |
- // The symbol_table needs to be reloaded as it might have grown in the |
- // previous iteration. |
- symbol_table = object_store->symbol_table(); |
intptr_t idx = (kNullCharId + c); |
ASSERT(idx < kMaxPredefinedId); |
ASSERT(Utf::IsLatin1(c)); |
uint8_t ch = static_cast<uint8_t>(c); |
String* str = String::ReadOnlyHandle(); |
*str = OneByteString::New(&ch, 1, Heap::kOld); |
- Add(symbol_table, *str); |
+ AddToVMIsolate(*str); |
predefined_[c] = str->raw(); |
symbol_handles_[idx] = str; |
} |
} |
+RawString* StringFrom(const uint8_t* data, intptr_t len, Heap::Space space) { |
+ return String::FromLatin1(data, len, space); |
+} |
+RawString* StringFrom(const uint16_t* data, intptr_t len, Heap::Space space) { |
+ return String::FromUTF16(data, len, space); |
+} |
+RawString* StringFrom(const int32_t* data, intptr_t len, Heap::Space space) { |
+ return String::FromUTF32(data, len, space); |
+} |
+ |
+ |
+template<typename CharType> |
+class CharArray { |
+ public: |
+ CharArray(const CharType* data, intptr_t len) |
+ : data_(data), len_(len) { |
+ hash_ = String::Hash(data, len); |
+ } |
+ RawString* ToSymbol() const { |
+ String& result = String::Handle(StringFrom(data_, len_, Heap::kOld)); |
+ result.SetCanonical(); |
+ result.SetHash(hash_); |
+ return result.raw(); |
+ } |
+ bool Equals(const String& other) const { |
+ return other.Equals(data_, len_); |
+ } |
+ intptr_t Hash() const { return hash_; } |
+ private: |
+ const CharType* data_; |
+ intptr_t len_; |
+ intptr_t hash_; |
+}; |
+typedef CharArray<uint8_t> Latin1Array; |
+typedef CharArray<uint16_t> UTF16Array; |
+typedef CharArray<int32_t> UTF32Array; |
+ |
+ |
+class StringSlice { |
+ public: |
+ StringSlice(const String* str, intptr_t begin_index, intptr_t length) |
Ivan Posva
2014/07/29 21:55:10
Why do you use "const String* str" instead of a re
koda
2014/07/29 22:31:50
A remnant from when this was just a struct. Change
|
+ : str_(str), begin_index_(begin_index), len_(length) { |
+ hash_ = is_all() ? str->Hash() : String::Hash(*str, begin_index, length); |
+ } |
+ RawString* ToSymbol() const; |
+ bool Equals(const String& other) const { |
+ return other.Equals(*str_, begin_index_, len_); |
+ } |
+ intptr_t Hash() const { return hash_; } |
+ private: |
+ bool is_all() const { return begin_index_ == 0 && len_ == str_->Length(); } |
+ const String* str_; |
+ intptr_t begin_index_; |
+ intptr_t len_; |
+ intptr_t hash_; |
+}; |
+ |
+ |
+RawString* StringSlice::ToSymbol() const { |
+ if (is_all() && str_->IsOld()) { |
+ str_->SetCanonical(); |
+ return str_->raw(); |
+ } else { |
+ String& result = String::Handle( |
+ String::SubString(*str_, begin_index_, len_, Heap::kOld)); |
+ result.SetCanonical(); |
+ result.SetHash(hash_); |
+ return result.raw(); |
+ } |
+} |
+ |
+ |
+class SymbolTraits { |
+ public: |
+ static bool IsMatch(const Object& a, const Object& b) { |
+ return String::Cast(a).Equals(String::Cast(b)); |
+ } |
+ template<typename CharType> |
Ivan Posva
2014/07/23 12:09:26
As below I would like to discuss this in person as
koda
2014/07/29 22:31:50
Acknowledged.
|
+ static bool IsMatch(const CharArray<CharType>& array, const Object& obj) { |
+ return array.Equals(String::Cast(obj)); |
+ } |
+ static bool IsMatch(const StringSlice& slice, const Object& obj) { |
+ return slice.Equals(String::Cast(obj)); |
+ } |
+ static uword Hash(const Object& key) { |
+ return String::Cast(key).Hash(); |
+ } |
+ template<typename CharType> |
+ static uword Hash(const CharArray<CharType>& array) { |
+ return array.Hash(); |
+ } |
+ static uword Hash(const StringSlice& slice) { |
+ return slice.Hash(); |
+ } |
+ template<typename CharType> |
+ static RawObject* NewKey(const CharArray<CharType>& array) { |
+ return array.ToSymbol(); |
+ } |
+ static RawObject* NewKey(const StringSlice& slice) { |
+ return slice.ToSymbol(); |
+ } |
+}; |
+typedef UnorderedHashSet<SymbolTraits> SymbolTable; |
+ |
+ |
void Symbols::SetupSymbolTable(Isolate* isolate) { |
ASSERT(isolate != NULL); |
// Setup the symbol table used within the String class. |
const intptr_t initial_size = (isolate == Dart::vm_isolate()) ? |
kInitialVMIsolateSymtabSize : kInitialSymtabSize; |
- const Array& array = Array::Handle(Array::New(initial_size + 1, Heap::kOld)); |
- |
- // Last element contains the count of used slots. |
- array.SetAt(initial_size, Smi::Handle(Smi::New(0))); |
+ Array& array = |
+ Array::Handle(HashTables::New<SymbolTable>(initial_size, Heap::kOld)); |
isolate->object_store()->set_symbol_table(array); |
} |
-intptr_t Symbols::Size(Isolate* isolate) { |
+void Symbols::GetStats(Isolate* isolate, intptr_t* size, intptr_t* capacity) { |
ASSERT(isolate != NULL); |
- Array& symbol_table = Array::Handle(isolate, |
- isolate->object_store()->symbol_table()); |
- intptr_t table_size_index = symbol_table.Length() - 1; |
- dart::Smi& used = Smi::Handle(); |
- used ^= symbol_table.At(table_size_index); |
- return used.Value(); |
+ SymbolTable table(Array::Handle(isolate->object_store()->symbol_table())); |
+ *size = table.NumOccupied(); |
+ *capacity = table.NumEntries(); |
+ table.Release(); |
} |
-void Symbols::Add(const Array& symbol_table, const String& str) { |
+void Symbols::AddToVMIsolate(const String& str) { |
// Should only be run by the vm isolate. |
ASSERT(Isolate::Current() == Dart::vm_isolate()); |
- intptr_t hash = str.Hash(); |
- intptr_t index = FindIndex(symbol_table, str, 0, str.Length(), hash); |
- ASSERT(symbol_table.At(index) == String::null()); |
- InsertIntoSymbolTable(symbol_table, str, index); |
+ Isolate* isolate = Dart::vm_isolate(); |
+ Array& array = Array::Handle(isolate->object_store()->symbol_table()); |
+ SymbolTable table(array); |
+ bool present = table.Insert(str); |
+ str.SetCanonical(); |
+ ASSERT(!present); |
+ isolate->object_store()->set_symbol_table(array); |
Ivan Posva
2014/07/23 12:09:26
I find this pattern really awkward. Reading just t
koda
2014/07/29 22:31:50
Acknowledged.
|
+ table.Release(); |
} |
@@ -179,71 +266,43 @@ |
RawString* Symbols::FromLatin1(const uint8_t* latin1_array, intptr_t len) { |
- return NewSymbol(latin1_array, len, String::FromLatin1); |
+ return NewSymbol(Latin1Array(latin1_array, len)); |
} |
RawString* Symbols::FromUTF16(const uint16_t* utf16_array, intptr_t len) { |
- return NewSymbol(utf16_array, len, String::FromUTF16); |
+ return NewSymbol(UTF16Array(utf16_array, len)); |
} |
RawString* Symbols::FromUTF32(const int32_t* utf32_array, intptr_t len) { |
- return NewSymbol(utf32_array, len, String::FromUTF32); |
+ return NewSymbol(UTF32Array(utf32_array, len)); |
} |
-template<typename CharacterType, typename CallbackType> |
-RawString* Symbols::NewSymbol(const CharacterType* characters, |
- intptr_t len, |
- CallbackType new_string) { |
- Isolate* isolate = Isolate::Current(); |
- String& symbol = String::Handle(isolate, String::null()); |
- Array& symbol_table = Array::Handle(isolate, Array::null()); |
- |
- // Calculate the String hash for this sequence of characters. |
- intptr_t hash = String::Hash(characters, len); |
- |
- // First check if a symbol exists in the vm isolate for these characters. |
- symbol_table = Dart::vm_isolate()->object_store()->symbol_table(); |
- intptr_t index = FindIndex(symbol_table, characters, len, hash); |
- symbol ^= symbol_table.At(index); |
+template<typename StringType> |
Ivan Posva
2014/07/29 21:55:10
Please add a comment enumerating all possible Stri
koda
2014/07/29 22:31:49
Done.
|
+RawString* Symbols::NewSymbol(const StringType& str) { |
+ String& symbol = String::Handle(); |
+ { |
+ Isolate* isolate = Dart::vm_isolate(); |
+ Array& array = Array::Handle(isolate->object_store()->symbol_table()); |
+ SymbolTable table(array); |
+ symbol ^= table.GetOrNull(str); |
+ table.Release(); |
+ } |
if (symbol.IsNull()) { |
- // Now try in the symbol table of the current isolate. |
- symbol_table = isolate->object_store()->symbol_table(); |
- index = FindIndex(symbol_table, characters, len, hash); |
- // Since we leave enough room in the table to guarantee, that we find an |
- // empty spot, index is the insertion point if symbol is null. |
- symbol ^= symbol_table.At(index); |
- if (symbol.IsNull()) { |
- // Allocate new result string. |
- symbol = (*new_string)(characters, len, Heap::kOld); |
- symbol.SetHash(hash); // Remember the calculated hash value. |
- InsertIntoSymbolTable(symbol_table, symbol, index); |
- } |
+ Isolate* isolate = Isolate::Current(); |
+ Array& array = Array::Handle(isolate->object_store()->symbol_table()); |
+ SymbolTable table(array); |
+ symbol ^= table.InsertNewOrGet(str); |
+ isolate->object_store()->set_symbol_table(array); |
+ table.Release(); |
} |
ASSERT(symbol.IsSymbol()); |
return symbol.raw(); |
} |
-template RawString* Symbols::NewSymbol(const uint8_t* characters, |
- intptr_t len, |
- RawString* (*new_string)(const uint8_t*, |
- intptr_t, |
- Heap::Space)); |
-template RawString* Symbols::NewSymbol(const uint16_t* characters, |
- intptr_t len, |
- RawString* (*new_string)(const uint16_t*, |
- intptr_t, |
- Heap::Space)); |
-template RawString* Symbols::NewSymbol(const int32_t* characters, |
- intptr_t len, |
- RawString* (*new_string)(const int32_t*, |
- intptr_t, |
- Heap::Space)); |
- |
- |
RawString* Symbols::New(const String& str) { |
if (str.IsSymbol()) { |
return str.raw(); |
@@ -253,43 +312,7 @@ |
RawString* Symbols::New(const String& str, intptr_t begin_index, intptr_t len) { |
- ASSERT(begin_index >= 0); |
- ASSERT(len >= 0); |
- ASSERT((begin_index + len) <= str.Length()); |
- Isolate* isolate = Isolate::Current(); |
- ASSERT(isolate != Dart::vm_isolate()); |
- String& symbol = String::Handle(isolate, String::null()); |
- Array& symbol_table = Array::Handle(isolate, Array::null()); |
- |
- // Calculate the String hash for this sequence of characters. |
- intptr_t hash = (begin_index == 0 && len == str.Length()) ? str.Hash() : |
- String::Hash(str, begin_index, len); |
- |
- // First check if a symbol exists in the vm isolate for these characters. |
- symbol_table = Dart::vm_isolate()->object_store()->symbol_table(); |
- intptr_t index = FindIndex(symbol_table, str, begin_index, len, hash); |
- symbol ^= symbol_table.At(index); |
- if (symbol.IsNull()) { |
- // Now try in the symbol table of the current isolate. |
- symbol_table = isolate->object_store()->symbol_table(); |
- index = FindIndex(symbol_table, str, begin_index, len, hash); |
- // Since we leave enough room in the table to guarantee, that we find an |
- // empty spot, index is the insertion point if symbol is null. |
- symbol ^= symbol_table.At(index); |
- if (symbol.IsNull()) { |
- if (str.IsOld() && begin_index == 0 && len == str.Length()) { |
- // Reuse the incoming str as the symbol value. |
- symbol = str.raw(); |
- } else { |
- // Allocate a copy in old space. |
- symbol = String::SubString(str, begin_index, len, Heap::kOld); |
- symbol.SetHash(hash); |
- } |
- InsertIntoSymbolTable(symbol_table, symbol, index); |
- } |
- } |
- ASSERT(symbol.IsSymbol()); |
- return symbol.raw(); |
+ return NewSymbol(StringSlice(&str, begin_index, len)); |
} |
@@ -303,163 +326,22 @@ |
void Symbols::DumpStats() { |
if (FLAG_dump_symbol_stats) { |
- intptr_t table_size = 0; |
- dart::Smi& used = Smi::Handle(); |
- Array& symbol_table = Array::Handle(Array::null()); |
- |
+ intptr_t size = -1; |
+ intptr_t capacity = -1; |
// First dump VM symbol table stats. |
- symbol_table = Dart::vm_isolate()->object_store()->symbol_table(); |
- table_size = symbol_table.Length() - 1; |
- used ^= symbol_table.At(table_size); |
- OS::Print("VM Isolate: Number of symbols : %" Pd "\n", used.Value()); |
- OS::Print("VM Isolate: Symbol table capacity : %" Pd "\n", table_size); |
- |
+ GetStats(Dart::vm_isolate(), &size, &capacity); |
+ OS::Print("VM Isolate: Number of symbols : %" Pd "\n", size); |
+ OS::Print("VM Isolate: Symbol table capacity : %" Pd "\n", capacity); |
// Now dump regular isolate symbol table stats. |
- symbol_table = Isolate::Current()->object_store()->symbol_table(); |
- table_size = symbol_table.Length() - 1; |
- used ^= symbol_table.At(table_size); |
- OS::Print("Isolate: Number of symbols : %" Pd "\n", used.Value()); |
- OS::Print("Isolate: Symbol table capacity : %" Pd "\n", table_size); |
- |
- // Dump overall collision and growth counts. |
- OS::Print("Number of symbol table grows = %" Pd "\n", num_of_grows_); |
- OS::Print("Collision counts on add and lookup :\n"); |
- intptr_t i = 0; |
- for (i = 0; i < (kMaxCollisionBuckets - 1); i++) { |
- OS::Print(" %" Pd " collisions => %" Pd "\n", i, collision_count_[i]); |
- } |
- OS::Print(" > %" Pd " collisions => %" Pd "\n", i, collision_count_[i]); |
+ GetStats(Isolate::Current(), &size, &capacity); |
+ OS::Print("Isolate: Number of symbols : %" Pd "\n", size); |
+ OS::Print("Isolate: Symbol table capacity : %" Pd "\n", capacity); |
+ // TODO(koda): Consider recording growth and collision stats in HashTable, |
+ // in DEBUG mode. |
} |
} |
-void Symbols::GrowSymbolTable(const Array& symbol_table) { |
- // TODO(iposva): Avoid exponential growth. |
- num_of_grows_ += 1; |
- intptr_t table_size = symbol_table.Length() - 1; |
- intptr_t new_table_size = table_size * 2; |
- Array& new_symbol_table = |
- Array::Handle(Array::New(new_table_size + 1, Heap::kOld)); |
- // Copy all elements from the original symbol table to the newly allocated |
- // array. |
- String& element = String::Handle(); |
- dart::Object& new_element = Object::Handle(); |
- for (intptr_t i = 0; i < table_size; i++) { |
- element ^= symbol_table.At(i); |
- if (!element.IsNull()) { |
- intptr_t hash = element.Hash(); |
- intptr_t index = hash % new_table_size; |
- new_element = new_symbol_table.At(index); |
- intptr_t num_collisions = 0; |
- while (!new_element.IsNull()) { |
- index = (index + 1) % new_table_size; // Move to next element. |
- new_element = new_symbol_table.At(index); |
- num_collisions += 1; |
- } |
- if (FLAG_dump_symbol_stats) { |
- if (num_collisions >= kMaxCollisionBuckets) { |
- num_collisions = (kMaxCollisionBuckets - 1); |
- } |
- collision_count_[num_collisions] += 1; |
- } |
- new_symbol_table.SetAt(index, element); |
- } |
- } |
- // Copy used count. |
- new_element = symbol_table.At(table_size); |
- new_symbol_table.SetAt(new_table_size, new_element); |
- // Remember the new symbol table now. |
- Isolate::Current()->object_store()->set_symbol_table(new_symbol_table); |
-} |
- |
- |
-void Symbols::InsertIntoSymbolTable(const Array& symbol_table, |
- const String& symbol, |
- intptr_t index) { |
- intptr_t table_size = symbol_table.Length() - 1; |
- symbol.SetCanonical(); // Mark object as being canonical. |
- symbol_table.SetAt(index, symbol); // Remember the new symbol. |
- dart::Smi& used = Smi::Handle(); |
- used ^= symbol_table.At(table_size); |
- intptr_t used_elements = used.Value() + 1; // One more element added. |
- used = Smi::New(used_elements); |
- symbol_table.SetAt(table_size, used); // Update used count. |
- |
- // Rehash if symbol_table is 75% full. |
- if (used_elements > ((table_size / 4) * 3)) { |
- GrowSymbolTable(symbol_table); |
- } |
-} |
- |
- |
-template<typename T> |
-intptr_t Symbols::FindIndex(const Array& symbol_table, |
- const T* characters, |
- intptr_t len, |
- intptr_t hash) { |
- // Last element of the array is the number of used elements. |
- intptr_t table_size = symbol_table.Length() - 1; |
- intptr_t index = hash % table_size; |
- intptr_t num_collisions = 0; |
- |
- String& symbol = String::Handle(); |
- symbol ^= symbol_table.At(index); |
- while (!symbol.IsNull() && !symbol.Equals(characters, len)) { |
- index = (index + 1) % table_size; // Move to next element. |
- symbol ^= symbol_table.At(index); |
- num_collisions += 1; |
- } |
- if (FLAG_dump_symbol_stats) { |
- if (num_collisions >= kMaxCollisionBuckets) { |
- num_collisions = (kMaxCollisionBuckets - 1); |
- } |
- collision_count_[num_collisions] += 1; |
- } |
- return index; // Index of symbol if found or slot into which to add symbol. |
-} |
- |
- |
-template intptr_t Symbols::FindIndex(const Array& symbol_table, |
- const uint8_t* characters, |
- intptr_t len, |
- intptr_t hash); |
-template intptr_t Symbols::FindIndex(const Array& symbol_table, |
- const uint16_t* characters, |
- intptr_t len, |
- intptr_t hash); |
-template intptr_t Symbols::FindIndex(const Array& symbol_table, |
- const int32_t* characters, |
- intptr_t len, |
- intptr_t hash); |
- |
- |
-intptr_t Symbols::FindIndex(const Array& symbol_table, |
- const String& str, |
- intptr_t begin_index, |
- intptr_t len, |
- intptr_t hash) { |
- // Last element of the array is the number of used elements. |
- intptr_t table_size = symbol_table.Length() - 1; |
- intptr_t index = hash % table_size; |
- intptr_t num_collisions = 0; |
- |
- String& symbol = String::Handle(); |
- symbol ^= symbol_table.At(index); |
- while (!symbol.IsNull() && !symbol.Equals(str, begin_index, len)) { |
- index = (index + 1) % table_size; // Move to next element. |
- symbol ^= symbol_table.At(index); |
- num_collisions += 1; |
- } |
- if (FLAG_dump_symbol_stats) { |
- if (num_collisions >= kMaxCollisionBuckets) { |
- num_collisions = (kMaxCollisionBuckets - 1); |
- } |
- collision_count_[num_collisions] += 1; |
- } |
- return index; // Index of symbol if found or slot into which to add symbol. |
-} |
- |
- |
intptr_t Symbols::LookupVMSymbol(RawObject* obj) { |
for (intptr_t i = 1; i < Symbols::kMaxPredefinedId; i++) { |
if (symbol_handles_[i]->raw() == obj) { |