Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(834)

Unified Diff: runtime/vm/symbols.cc

Issue 397413002: Reimplement Symbols using hash table template. (Closed) Base URL: http://dart.googlecode.com/svn/branches/bleeding_edge/dart/
Patch Set: Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « runtime/vm/symbols.h ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: runtime/vm/symbols.cc
===================================================================
--- runtime/vm/symbols.cc (revision 38342)
+++ runtime/vm/symbols.cc (working copy)
@@ -6,6 +6,7 @@
#include "vm/handles.h"
#include "vm/handles_impl.h"
+#include "vm/hash_table.h"
#include "vm/isolate.h"
#include "vm/object.h"
#include "vm/object_store.h"
@@ -35,9 +36,6 @@
#undef DEFINE_KEYWORD_SYMBOL_INDEX
};
-intptr_t Symbols::num_of_grows_;
-intptr_t Symbols::collision_count_[kMaxCollisionBuckets];
-
DEFINE_FLAG(bool, dump_symbol_stats, false, "Dump symbol table statistics");
@@ -61,30 +59,17 @@
// Should only be run by the vm isolate.
ASSERT(isolate == Dart::vm_isolate());
- if (FLAG_dump_symbol_stats) {
- num_of_grows_ = 0;
- for (intptr_t i = 0; i < kMaxCollisionBuckets; i++) {
- collision_count_[i] = 0;
- }
- }
-
// Create and setup a symbol table in the vm isolate.
SetupSymbolTable(isolate);
// Create all predefined symbols.
ASSERT((sizeof(names) / sizeof(const char*)) == Symbols::kNullCharId);
- ObjectStore* object_store = isolate->object_store();
- Array& symbol_table = Array::Handle();
-
// First set up all the predefined string symbols.
for (intptr_t i = 1; i < Symbols::kKwTableStart; i++) {
- // The symbol_table needs to be reloaded as it might have grown in the
- // previous iteration.
- symbol_table = object_store->symbol_table();
String* str = String::ReadOnlyHandle();
*str = OneByteString::New(names[i], Heap::kOld);
- Add(symbol_table, *str);
+ AddToVMIsolate(*str);
symbol_handles_[i] = str;
}
Object::RegisterSingletonClassNames();
@@ -100,54 +85,156 @@
// Add Latin1 characters as Symbols, so that Symbols::FromCharCode is fast.
for (intptr_t c = 0; c < kNumberOfOneCharCodeSymbols; c++) {
- // The symbol_table needs to be reloaded as it might have grown in the
- // previous iteration.
- symbol_table = object_store->symbol_table();
intptr_t idx = (kNullCharId + c);
ASSERT(idx < kMaxPredefinedId);
ASSERT(Utf::IsLatin1(c));
uint8_t ch = static_cast<uint8_t>(c);
String* str = String::ReadOnlyHandle();
*str = OneByteString::New(&ch, 1, Heap::kOld);
- Add(symbol_table, *str);
+ AddToVMIsolate(*str);
predefined_[c] = str->raw();
symbol_handles_[idx] = str;
}
}
+RawString* StringFrom(const uint8_t* data, intptr_t len, Heap::Space space) {
+ return String::FromLatin1(data, len, space);
+}
+RawString* StringFrom(const uint16_t* data, intptr_t len, Heap::Space space) {
+ return String::FromUTF16(data, len, space);
+}
+RawString* StringFrom(const int32_t* data, intptr_t len, Heap::Space space) {
+ return String::FromUTF32(data, len, space);
+}
+
+
+template<typename CharType>
+class CharArray {
+ public:
+ CharArray(const CharType* data, intptr_t len)
+ : data_(data), len_(len) {
+ hash_ = String::Hash(data, len);
+ }
+ RawString* ToSymbol() const {
+ String& result = String::Handle(StringFrom(data_, len_, Heap::kOld));
+ result.SetCanonical();
+ result.SetHash(hash_);
+ return result.raw();
+ }
+ bool Equals(const String& other) const {
+ return other.Equals(data_, len_);
+ }
+ intptr_t Hash() const { return hash_; }
+ private:
+ const CharType* data_;
+ intptr_t len_;
+ intptr_t hash_;
+};
+typedef CharArray<uint8_t> Latin1Array;
+typedef CharArray<uint16_t> UTF16Array;
+typedef CharArray<int32_t> UTF32Array;
+
+
+class StringSlice {
+ public:
+ StringSlice(const String* str, intptr_t begin_index, intptr_t length)
Ivan Posva 2014/07/29 21:55:10 Why do you use "const String* str" instead of a re
koda 2014/07/29 22:31:50 A remnant from when this was just a struct. Change
+ : str_(str), begin_index_(begin_index), len_(length) {
+ hash_ = is_all() ? str->Hash() : String::Hash(*str, begin_index, length);
+ }
+ RawString* ToSymbol() const;
+ bool Equals(const String& other) const {
+ return other.Equals(*str_, begin_index_, len_);
+ }
+ intptr_t Hash() const { return hash_; }
+ private:
+ bool is_all() const { return begin_index_ == 0 && len_ == str_->Length(); }
+ const String* str_;
+ intptr_t begin_index_;
+ intptr_t len_;
+ intptr_t hash_;
+};
+
+
+RawString* StringSlice::ToSymbol() const {
+ if (is_all() && str_->IsOld()) {
+ str_->SetCanonical();
+ return str_->raw();
+ } else {
+ String& result = String::Handle(
+ String::SubString(*str_, begin_index_, len_, Heap::kOld));
+ result.SetCanonical();
+ result.SetHash(hash_);
+ return result.raw();
+ }
+}
+
+
+class SymbolTraits {
+ public:
+ static bool IsMatch(const Object& a, const Object& b) {
+ return String::Cast(a).Equals(String::Cast(b));
+ }
+ template<typename CharType>
Ivan Posva 2014/07/23 12:09:26 As below I would like to discuss this in person as
koda 2014/07/29 22:31:50 Acknowledged.
+ static bool IsMatch(const CharArray<CharType>& array, const Object& obj) {
+ return array.Equals(String::Cast(obj));
+ }
+ static bool IsMatch(const StringSlice& slice, const Object& obj) {
+ return slice.Equals(String::Cast(obj));
+ }
+ static uword Hash(const Object& key) {
+ return String::Cast(key).Hash();
+ }
+ template<typename CharType>
+ static uword Hash(const CharArray<CharType>& array) {
+ return array.Hash();
+ }
+ static uword Hash(const StringSlice& slice) {
+ return slice.Hash();
+ }
+ template<typename CharType>
+ static RawObject* NewKey(const CharArray<CharType>& array) {
+ return array.ToSymbol();
+ }
+ static RawObject* NewKey(const StringSlice& slice) {
+ return slice.ToSymbol();
+ }
+};
+typedef UnorderedHashSet<SymbolTraits> SymbolTable;
+
+
void Symbols::SetupSymbolTable(Isolate* isolate) {
ASSERT(isolate != NULL);
// Setup the symbol table used within the String class.
const intptr_t initial_size = (isolate == Dart::vm_isolate()) ?
kInitialVMIsolateSymtabSize : kInitialSymtabSize;
- const Array& array = Array::Handle(Array::New(initial_size + 1, Heap::kOld));
-
- // Last element contains the count of used slots.
- array.SetAt(initial_size, Smi::Handle(Smi::New(0)));
+ Array& array =
+ Array::Handle(HashTables::New<SymbolTable>(initial_size, Heap::kOld));
isolate->object_store()->set_symbol_table(array);
}
-intptr_t Symbols::Size(Isolate* isolate) {
+void Symbols::GetStats(Isolate* isolate, intptr_t* size, intptr_t* capacity) {
ASSERT(isolate != NULL);
- Array& symbol_table = Array::Handle(isolate,
- isolate->object_store()->symbol_table());
- intptr_t table_size_index = symbol_table.Length() - 1;
- dart::Smi& used = Smi::Handle();
- used ^= symbol_table.At(table_size_index);
- return used.Value();
+ SymbolTable table(Array::Handle(isolate->object_store()->symbol_table()));
+ *size = table.NumOccupied();
+ *capacity = table.NumEntries();
+ table.Release();
}
-void Symbols::Add(const Array& symbol_table, const String& str) {
+void Symbols::AddToVMIsolate(const String& str) {
// Should only be run by the vm isolate.
ASSERT(Isolate::Current() == Dart::vm_isolate());
- intptr_t hash = str.Hash();
- intptr_t index = FindIndex(symbol_table, str, 0, str.Length(), hash);
- ASSERT(symbol_table.At(index) == String::null());
- InsertIntoSymbolTable(symbol_table, str, index);
+ Isolate* isolate = Dart::vm_isolate();
+ Array& array = Array::Handle(isolate->object_store()->symbol_table());
+ SymbolTable table(array);
+ bool present = table.Insert(str);
+ str.SetCanonical();
+ ASSERT(!present);
+ isolate->object_store()->set_symbol_table(array);
Ivan Posva 2014/07/23 12:09:26 I find this pattern really awkward. Reading just t
koda 2014/07/29 22:31:50 Acknowledged.
+ table.Release();
}
@@ -179,71 +266,43 @@
RawString* Symbols::FromLatin1(const uint8_t* latin1_array, intptr_t len) {
- return NewSymbol(latin1_array, len, String::FromLatin1);
+ return NewSymbol(Latin1Array(latin1_array, len));
}
RawString* Symbols::FromUTF16(const uint16_t* utf16_array, intptr_t len) {
- return NewSymbol(utf16_array, len, String::FromUTF16);
+ return NewSymbol(UTF16Array(utf16_array, len));
}
RawString* Symbols::FromUTF32(const int32_t* utf32_array, intptr_t len) {
- return NewSymbol(utf32_array, len, String::FromUTF32);
+ return NewSymbol(UTF32Array(utf32_array, len));
}
-template<typename CharacterType, typename CallbackType>
-RawString* Symbols::NewSymbol(const CharacterType* characters,
- intptr_t len,
- CallbackType new_string) {
- Isolate* isolate = Isolate::Current();
- String& symbol = String::Handle(isolate, String::null());
- Array& symbol_table = Array::Handle(isolate, Array::null());
-
- // Calculate the String hash for this sequence of characters.
- intptr_t hash = String::Hash(characters, len);
-
- // First check if a symbol exists in the vm isolate for these characters.
- symbol_table = Dart::vm_isolate()->object_store()->symbol_table();
- intptr_t index = FindIndex(symbol_table, characters, len, hash);
- symbol ^= symbol_table.At(index);
+template<typename StringType>
Ivan Posva 2014/07/29 21:55:10 Please add a comment enumerating all possible Stri
koda 2014/07/29 22:31:49 Done.
+RawString* Symbols::NewSymbol(const StringType& str) {
+ String& symbol = String::Handle();
+ {
+ Isolate* isolate = Dart::vm_isolate();
+ Array& array = Array::Handle(isolate->object_store()->symbol_table());
+ SymbolTable table(array);
+ symbol ^= table.GetOrNull(str);
+ table.Release();
+ }
if (symbol.IsNull()) {
- // Now try in the symbol table of the current isolate.
- symbol_table = isolate->object_store()->symbol_table();
- index = FindIndex(symbol_table, characters, len, hash);
- // Since we leave enough room in the table to guarantee, that we find an
- // empty spot, index is the insertion point if symbol is null.
- symbol ^= symbol_table.At(index);
- if (symbol.IsNull()) {
- // Allocate new result string.
- symbol = (*new_string)(characters, len, Heap::kOld);
- symbol.SetHash(hash); // Remember the calculated hash value.
- InsertIntoSymbolTable(symbol_table, symbol, index);
- }
+ Isolate* isolate = Isolate::Current();
+ Array& array = Array::Handle(isolate->object_store()->symbol_table());
+ SymbolTable table(array);
+ symbol ^= table.InsertNewOrGet(str);
+ isolate->object_store()->set_symbol_table(array);
+ table.Release();
}
ASSERT(symbol.IsSymbol());
return symbol.raw();
}
-template RawString* Symbols::NewSymbol(const uint8_t* characters,
- intptr_t len,
- RawString* (*new_string)(const uint8_t*,
- intptr_t,
- Heap::Space));
-template RawString* Symbols::NewSymbol(const uint16_t* characters,
- intptr_t len,
- RawString* (*new_string)(const uint16_t*,
- intptr_t,
- Heap::Space));
-template RawString* Symbols::NewSymbol(const int32_t* characters,
- intptr_t len,
- RawString* (*new_string)(const int32_t*,
- intptr_t,
- Heap::Space));
-
-
RawString* Symbols::New(const String& str) {
if (str.IsSymbol()) {
return str.raw();
@@ -253,43 +312,7 @@
RawString* Symbols::New(const String& str, intptr_t begin_index, intptr_t len) {
- ASSERT(begin_index >= 0);
- ASSERT(len >= 0);
- ASSERT((begin_index + len) <= str.Length());
- Isolate* isolate = Isolate::Current();
- ASSERT(isolate != Dart::vm_isolate());
- String& symbol = String::Handle(isolate, String::null());
- Array& symbol_table = Array::Handle(isolate, Array::null());
-
- // Calculate the String hash for this sequence of characters.
- intptr_t hash = (begin_index == 0 && len == str.Length()) ? str.Hash() :
- String::Hash(str, begin_index, len);
-
- // First check if a symbol exists in the vm isolate for these characters.
- symbol_table = Dart::vm_isolate()->object_store()->symbol_table();
- intptr_t index = FindIndex(symbol_table, str, begin_index, len, hash);
- symbol ^= symbol_table.At(index);
- if (symbol.IsNull()) {
- // Now try in the symbol table of the current isolate.
- symbol_table = isolate->object_store()->symbol_table();
- index = FindIndex(symbol_table, str, begin_index, len, hash);
- // Since we leave enough room in the table to guarantee, that we find an
- // empty spot, index is the insertion point if symbol is null.
- symbol ^= symbol_table.At(index);
- if (symbol.IsNull()) {
- if (str.IsOld() && begin_index == 0 && len == str.Length()) {
- // Reuse the incoming str as the symbol value.
- symbol = str.raw();
- } else {
- // Allocate a copy in old space.
- symbol = String::SubString(str, begin_index, len, Heap::kOld);
- symbol.SetHash(hash);
- }
- InsertIntoSymbolTable(symbol_table, symbol, index);
- }
- }
- ASSERT(symbol.IsSymbol());
- return symbol.raw();
+ return NewSymbol(StringSlice(&str, begin_index, len));
}
@@ -303,163 +326,22 @@
void Symbols::DumpStats() {
if (FLAG_dump_symbol_stats) {
- intptr_t table_size = 0;
- dart::Smi& used = Smi::Handle();
- Array& symbol_table = Array::Handle(Array::null());
-
+ intptr_t size = -1;
+ intptr_t capacity = -1;
// First dump VM symbol table stats.
- symbol_table = Dart::vm_isolate()->object_store()->symbol_table();
- table_size = symbol_table.Length() - 1;
- used ^= symbol_table.At(table_size);
- OS::Print("VM Isolate: Number of symbols : %" Pd "\n", used.Value());
- OS::Print("VM Isolate: Symbol table capacity : %" Pd "\n", table_size);
-
+ GetStats(Dart::vm_isolate(), &size, &capacity);
+ OS::Print("VM Isolate: Number of symbols : %" Pd "\n", size);
+ OS::Print("VM Isolate: Symbol table capacity : %" Pd "\n", capacity);
// Now dump regular isolate symbol table stats.
- symbol_table = Isolate::Current()->object_store()->symbol_table();
- table_size = symbol_table.Length() - 1;
- used ^= symbol_table.At(table_size);
- OS::Print("Isolate: Number of symbols : %" Pd "\n", used.Value());
- OS::Print("Isolate: Symbol table capacity : %" Pd "\n", table_size);
-
- // Dump overall collision and growth counts.
- OS::Print("Number of symbol table grows = %" Pd "\n", num_of_grows_);
- OS::Print("Collision counts on add and lookup :\n");
- intptr_t i = 0;
- for (i = 0; i < (kMaxCollisionBuckets - 1); i++) {
- OS::Print(" %" Pd " collisions => %" Pd "\n", i, collision_count_[i]);
- }
- OS::Print(" > %" Pd " collisions => %" Pd "\n", i, collision_count_[i]);
+ GetStats(Isolate::Current(), &size, &capacity);
+ OS::Print("Isolate: Number of symbols : %" Pd "\n", size);
+ OS::Print("Isolate: Symbol table capacity : %" Pd "\n", capacity);
+ // TODO(koda): Consider recording growth and collision stats in HashTable,
+ // in DEBUG mode.
}
}
-void Symbols::GrowSymbolTable(const Array& symbol_table) {
- // TODO(iposva): Avoid exponential growth.
- num_of_grows_ += 1;
- intptr_t table_size = symbol_table.Length() - 1;
- intptr_t new_table_size = table_size * 2;
- Array& new_symbol_table =
- Array::Handle(Array::New(new_table_size + 1, Heap::kOld));
- // Copy all elements from the original symbol table to the newly allocated
- // array.
- String& element = String::Handle();
- dart::Object& new_element = Object::Handle();
- for (intptr_t i = 0; i < table_size; i++) {
- element ^= symbol_table.At(i);
- if (!element.IsNull()) {
- intptr_t hash = element.Hash();
- intptr_t index = hash % new_table_size;
- new_element = new_symbol_table.At(index);
- intptr_t num_collisions = 0;
- while (!new_element.IsNull()) {
- index = (index + 1) % new_table_size; // Move to next element.
- new_element = new_symbol_table.At(index);
- num_collisions += 1;
- }
- if (FLAG_dump_symbol_stats) {
- if (num_collisions >= kMaxCollisionBuckets) {
- num_collisions = (kMaxCollisionBuckets - 1);
- }
- collision_count_[num_collisions] += 1;
- }
- new_symbol_table.SetAt(index, element);
- }
- }
- // Copy used count.
- new_element = symbol_table.At(table_size);
- new_symbol_table.SetAt(new_table_size, new_element);
- // Remember the new symbol table now.
- Isolate::Current()->object_store()->set_symbol_table(new_symbol_table);
-}
-
-
-void Symbols::InsertIntoSymbolTable(const Array& symbol_table,
- const String& symbol,
- intptr_t index) {
- intptr_t table_size = symbol_table.Length() - 1;
- symbol.SetCanonical(); // Mark object as being canonical.
- symbol_table.SetAt(index, symbol); // Remember the new symbol.
- dart::Smi& used = Smi::Handle();
- used ^= symbol_table.At(table_size);
- intptr_t used_elements = used.Value() + 1; // One more element added.
- used = Smi::New(used_elements);
- symbol_table.SetAt(table_size, used); // Update used count.
-
- // Rehash if symbol_table is 75% full.
- if (used_elements > ((table_size / 4) * 3)) {
- GrowSymbolTable(symbol_table);
- }
-}
-
-
-template<typename T>
-intptr_t Symbols::FindIndex(const Array& symbol_table,
- const T* characters,
- intptr_t len,
- intptr_t hash) {
- // Last element of the array is the number of used elements.
- intptr_t table_size = symbol_table.Length() - 1;
- intptr_t index = hash % table_size;
- intptr_t num_collisions = 0;
-
- String& symbol = String::Handle();
- symbol ^= symbol_table.At(index);
- while (!symbol.IsNull() && !symbol.Equals(characters, len)) {
- index = (index + 1) % table_size; // Move to next element.
- symbol ^= symbol_table.At(index);
- num_collisions += 1;
- }
- if (FLAG_dump_symbol_stats) {
- if (num_collisions >= kMaxCollisionBuckets) {
- num_collisions = (kMaxCollisionBuckets - 1);
- }
- collision_count_[num_collisions] += 1;
- }
- return index; // Index of symbol if found or slot into which to add symbol.
-}
-
-
-template intptr_t Symbols::FindIndex(const Array& symbol_table,
- const uint8_t* characters,
- intptr_t len,
- intptr_t hash);
-template intptr_t Symbols::FindIndex(const Array& symbol_table,
- const uint16_t* characters,
- intptr_t len,
- intptr_t hash);
-template intptr_t Symbols::FindIndex(const Array& symbol_table,
- const int32_t* characters,
- intptr_t len,
- intptr_t hash);
-
-
-intptr_t Symbols::FindIndex(const Array& symbol_table,
- const String& str,
- intptr_t begin_index,
- intptr_t len,
- intptr_t hash) {
- // Last element of the array is the number of used elements.
- intptr_t table_size = symbol_table.Length() - 1;
- intptr_t index = hash % table_size;
- intptr_t num_collisions = 0;
-
- String& symbol = String::Handle();
- symbol ^= symbol_table.At(index);
- while (!symbol.IsNull() && !symbol.Equals(str, begin_index, len)) {
- index = (index + 1) % table_size; // Move to next element.
- symbol ^= symbol_table.At(index);
- num_collisions += 1;
- }
- if (FLAG_dump_symbol_stats) {
- if (num_collisions >= kMaxCollisionBuckets) {
- num_collisions = (kMaxCollisionBuckets - 1);
- }
- collision_count_[num_collisions] += 1;
- }
- return index; // Index of symbol if found or slot into which to add symbol.
-}
-
-
intptr_t Symbols::LookupVMSymbol(RawObject* obj) {
for (intptr_t i = 1; i < Symbols::kMaxPredefinedId; i++) {
if (symbol_handles_[i]->raw() == obj) {
« no previous file with comments | « runtime/vm/symbols.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698