src/objects.cc - Issue 11593007: Replace the use CharacterStreams in Heap::AllocateSymbolInternal and String::ComputeHash

Unified Diff: src/objects.cc

Issue 11593007: Replace the use CharacterStreams in Heap::AllocateSymbolInternal and String::ComputeHash (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: Created 8 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/objects.cc

diff --git a/src/objects.cc b/src/objects.cc

index 1d95a5c15271ab3d955e2cbd3f2b6ad1b270cc25..864a4e99a928899e6cd07b51ef8e26adcfb31cd5 100644

--- a/src/objects.cc

+++ b/src/objects.cc

@@ -7021,24 +7021,36 @@ void StringInputBuffer::Seek(unsigned pos) {

}

-String* ConsStringIteratorOp::Operate(ConsString* cons_string,

- unsigned* offset_out, int32_t* type_out, unsigned* length_out) {

- ASSERT(*length_out == (unsigned)cons_string->length());

- ASSERT(depth_ == 0);

- // Push the root string.

- PushLeft(cons_string);

+String* ConsStringIteratorOp::Operate(String* string,

+ unsigned* offset_out,

+ int32_t* type_out,

+ unsigned* length_out) {

+ ASSERT(string->IsConsString());

+ ConsString* cons_string = ConsString::cast(string);

+ // Set up search data.

root_ = cons_string;

- root_type_ = *type_out;

- root_length_ = *length_out;

consumed_ = *offset_out;

- unsigned targetOffset = *offset_out;

+ // Now search.

+ return Search(offset_out, type_out, length_out);

+String* ConsStringIteratorOp::Search(unsigned* offset_out,

+ int32_t* type_out,

+ unsigned* length_out) {

+ ConsString* cons_string = root_;

+ // Reset the stack, pushing the root string.

+ depth_ = 1;

+ maximum_depth_ = 1;

+ frames_[0] = cons_string;

+ const unsigned consumed = consumed_;

unsigned offset = 0;

while (true) {

// Loop until the string is found which contains the target offset.

String* string = cons_string->first();

unsigned length = string->length();

int32_t type;

- if (targetOffset < offset + length) {

+ if (consumed < offset + length) {

// Target offset is in the left branch.

// Keep going if we're still in a ConString.

type = string->map()->instance_type();

@@ -7067,6 +7079,7 @@ String* ConsStringIteratorOp::Operate(ConsString* cons_string,

// Account for the possibility of an empty right leaf.

// This happens only if we have asked for an offset outside the string.

if (length == 0) {

+ // Reset depth so future operations will return null immediately.

Reset();

return NULL;

}

@@ -7077,9 +7090,8 @@ String* ConsStringIteratorOp::Operate(ConsString* cons_string,

}

ASSERT(length != 0);

// Adjust return values and exit.

- unsigned innerOffset = targetOffset - offset;

- consumed_ += length - innerOffset;

- *offset_out = innerOffset;

+ consumed_ = offset + length;

+ *offset_out = consumed - offset;

*type_out = type;

*length_out = length;

return string;

@@ -7089,8 +7101,9 @@ String* ConsStringIteratorOp::Operate(ConsString* cons_string,

}

-String* ConsStringIteratorOp::NextLeaf(

- bool* blew_stack, int32_t* type_out, unsigned* length_out) {

+String* ConsStringIteratorOp::NextLeaf(bool* blew_stack,

+ int32_t* type_out,

+ unsigned* length_out) {

while (true) {

// Tree traversal complete.

if (depth_ == 0) {

@@ -7114,6 +7127,7 @@ String* ConsStringIteratorOp::NextLeaf(

if (length == 0) continue;

*length_out = length;

*type_out = type;

+ consumed_ += length;

return string;

}

cons_string = ConsString::cast(string);

@@ -7126,8 +7140,11 @@ String* ConsStringIteratorOp::NextLeaf(

type = string->map()->instance_type();

if ((type & kStringRepresentationMask) != kConsStringTag) {

AdjustMaximumDepth();

+ unsigned length = (unsigned) string->length();

+ ASSERT(length != 0);

+ *length_out = length;

*type_out = type;

- *length_out = string->length();

+ consumed_ += length;

return string;

}

cons_string = ConsString::cast(string);

@@ -7666,28 +7683,62 @@ bool String::IsTwoByteEqualTo(Vector<const uc16> str) {

}

+class IteratingStringHasher: public StringHasher {

+ public:

+ static inline uint32_t Hash(String* string, uint32_t seed) {

+ const unsigned len = static_cast<unsigned>(string->length());

+ IteratingStringHasher hasher(len, seed);

+ if (hasher.has_trivial_hash()) {

+ return hasher.GetHashField();

+ }

+ int32_t type = string->map()->instance_type();

+ ConsStringNullOp null_op;

+ String::Visit(string, 0, hasher, null_op, type, len);

+ // Flat strings terminate immediately.

+ if (hasher.consumed_ == len) {

+ ASSERT(!string->IsConsString());

+ return hasher.GetHashField();

+ }

+ ASSERT(string->IsConsString());

+ // This is a ConsString, iterate across it.

+ ConsStringIteratorOp op;

+ unsigned offset = 0;

+ unsigned leaf_length = len;

+ string = op.Operate(string, &offset, &type, &leaf_length);

+ while (true) {

+ ASSERT(hasher.consumed_ < len);

+ String::Visit(string, 0, hasher, null_op, type, leaf_length);

+ if (hasher.consumed_ == len) break;

+ string = op.ContinueOperation(&type, &leaf_length);

+ // This should be taken care of by the length check.

+ ASSERT(string != NULL);

+ }

+ return hasher.GetHashField();

+ }

+ inline void VisitOneByteString(const uint8_t* chars, unsigned length) {

+ AddCharacters(chars, static_cast<int>(length));

+ consumed_ += length;

+ }

+ inline void VisitTwoByteString(const uint16_t* chars, unsigned length) {

+ AddCharacters(chars, static_cast<int>(length));

+ consumed_ += length;

+ }

+ private:

+ inline IteratingStringHasher(int len, uint32_t seed)

+ : StringHasher(len, seed),

+ consumed_(0) {}

+ unsigned consumed_;

+ DISALLOW_COPY_AND_ASSIGN(IteratingStringHasher);

+};

uint32_t String::ComputeAndSetHash() {

// Should only be called if hash code has not yet been computed.

ASSERT(!HasHashCode());

- const int len = length();

- // Compute the hash code.

- uint32_t field = 0;

- if (StringShape(this).IsSequentialAscii()) {

- field = HashSequentialString(SeqOneByteString::cast(this)->GetChars(),

- len,

- GetHeap()->HashSeed());

- } else if (StringShape(this).IsSequentialTwoByte()) {

- field = HashSequentialString(SeqTwoByteString::cast(this)->GetChars(),

- len,

- GetHeap()->HashSeed());

- } else {

- StringInputBuffer buffer(this);

- field = ComputeHashField(&buffer, len, GetHeap()->HashSeed());

- }

// Store the hash code in the object.

+ uint32_t field = IteratingStringHasher::Hash(this, GetHeap()->HashSeed());

set_hash_field(field);

// Check the hash code is there.

@@ -7791,57 +7842,59 @@ uint32_t StringHasher::MakeArrayIndexHash(uint32_t value, int length) {

}

-void StringHasher::AddSurrogatePair(uc32 c) {

- uint16_t lead = unibrow::Utf16::LeadSurrogate(c);

- AddCharacter(lead);

- uint16_t trail = unibrow::Utf16::TrailSurrogate(c);

- AddCharacter(trail);

-void StringHasher::AddSurrogatePairNoIndex(uc32 c) {

- uint16_t lead = unibrow::Utf16::LeadSurrogate(c);

- AddCharacterNoIndex(lead);

- uint16_t trail = unibrow::Utf16::TrailSurrogate(c);

- AddCharacterNoIndex(trail);

uint32_t StringHasher::GetHashField() {

if (length_ <= String::kMaxHashCalcLength) {

- if (is_array_index()) {

- return MakeArrayIndexHash(array_index(), length_);

+ if (is_array_index_) {

+ return MakeArrayIndexHash(array_index_, length_);

}

- return (GetHash() << String::kHashShift) | String::kIsNotArrayIndexMask;

+ return (GetHashCore(raw_running_hash_) << String::kHashShift) |

+ String::kIsNotArrayIndexMask;

} else {

return (length_ << String::kHashShift) | String::kIsNotArrayIndexMask;

}

-uint32_t String::ComputeHashField(unibrow::CharacterStream* buffer,

- int length,

- uint32_t seed) {

+uint32_t StringHasher::ComputeHashField(unibrow::CharacterStream* buffer,

+ int length,

+ uint32_t seed) {

+ typedef unibrow::Utf16 u;

StringHasher hasher(length, seed);

// Very long strings have a trivial hash that doesn't inspect the

// string contents.

if (hasher.has_trivial_hash()) {

return hasher.GetHashField();

}

// Do the iterative array index computation as long as there is a

// chance this is an array index.

- while (buffer->has_more() && hasher.is_array_index()) {

- hasher.AddCharacter(buffer->GetNext());

+ if (hasher.is_array_index_) {

+ while (buffer->has_more()) {

+ uint32_t c = buffer->GetNext();

+ if (c > u::kMaxNonSurrogateCharCode) {

+ uint16_t c1 = u::LeadSurrogate(c);

+ uint16_t c2 = u::TrailSurrogate(c);

+ hasher.AddCharacter(c1);

+ hasher.AddCharacter(c2);

+ if (!hasher.UpdateIndex(c1)) break;

+ if (!hasher.UpdateIndex(c2)) break;

+ } else {

+ hasher.AddCharacter(c);

+ if (!hasher.UpdateIndex(c)) break;

+ }

}

// Process the remaining characters without updating the array

// index.

while (buffer->has_more()) {

- hasher.AddCharacterNoIndex(buffer->GetNext());

+ ASSERT(!hasher.is_array_index_);

+ uint32_t c = buffer->GetNext();

+ if (c > u::kMaxNonSurrogateCharCode) {

+ hasher.AddCharacter(u::LeadSurrogate(c));

+ hasher.AddCharacter(u::TrailSurrogate(c));

+ } else {

+ hasher.AddCharacter(c);

+ }

}

return hasher.GetHashField();

}

@@ -11652,7 +11705,7 @@ class Utf8SymbolKey : public HashTableKey {

unibrow::Utf8InputBuffer<> buffer(string_.start(),

static_cast<unsigned>(string_.length()));

chars_ = buffer.Utf16Length();

- hash_field_ = String::ComputeHashField(&buffer, chars_, seed_);

+ hash_field_ = StringHasher::ComputeHashField(&buffer, chars_, seed_);

uint32_t result = hash_field_ >> String::kHashShift;

ASSERT(result != 0); // Ensure that the hash value of 0 is never computed.

return result;

@@ -11682,29 +11735,9 @@ class SequentialSymbolKey : public HashTableKey {

: string_(string), hash_field_(0), seed_(seed) { }

uint32_t Hash() {

- StringHasher hasher(string_.length(), seed_);

- // Very long strings have a trivial hash that doesn't inspect the

- // string contents.

- if (hasher.has_trivial_hash()) {

- hash_field_ = hasher.GetHashField();

- } else {

- int i = 0;

- // Do the iterative array index computation as long as there is a

- // chance this is an array index.

- while (i < string_.length() && hasher.is_array_index()) {

- hasher.AddCharacter(static_cast<uc32>(string_[i]));

- i++;

- }

- // Process the remaining characters without updating the array

- // index.

- while (i < string_.length()) {

- hasher.AddCharacterNoIndex(static_cast<uc32>(string_[i]));

- i++;

- }

- hash_field_ = hasher.GetHashField();

- }

+ hash_field_ = StringHasher::HashSequentialString<Char>(string_.start(),

+ string_.length(),

+ seed_);

uint32_t result = hash_field_ >> String::kHashShift;

ASSERT(result != 0); // Ensure that the hash value of 0 is never computed.

@@ -11749,32 +11782,9 @@ class SubStringAsciiSymbolKey : public HashTableKey {

uint32_t Hash() {

ASSERT(length_ >= 0);

ASSERT(from_ + length_ <= string_->length());

- StringHasher hasher(length_, string_->GetHeap()->HashSeed());

- // Very long strings have a trivial hash that doesn't inspect the

- // string contents.

- if (hasher.has_trivial_hash()) {

- hash_field_ = hasher.GetHashField();

- } else {

- int i = 0;

- // Do the iterative array index computation as long as there is a

- // chance this is an array index.

- while (i < length_ && hasher.is_array_index()) {

- hasher.AddCharacter(static_cast<uc32>(

- string_->SeqOneByteStringGet(i + from_)));

- i++;

- }

- // Process the remaining characters without updating the array

- // index.

- while (i < length_) {

- hasher.AddCharacterNoIndex(static_cast<uc32>(

- string_->SeqOneByteStringGet(i + from_)));

- i++;

- }

- hash_field_ = hasher.GetHashField();

- }

+ char* chars = string_->GetChars() + from_;

+ hash_field_ = StringHasher::HashSequentialString(

+ chars, length_, string_->GetHeap()->HashSeed());

uint32_t result = hash_field_ >> String::kHashShift;

ASSERT(result != 0); // Ensure that the hash value of 0 is never computed.

return result;

@@ -11849,8 +11859,7 @@ class SymbolKey : public HashTableKey {

return string_;

}

// Otherwise allocate a new symbol.

- StringInputBuffer buffer(string_);

- return heap->AllocateInternalSymbol(&buffer,

+ return heap->AllocateInternalSymbol(string_,

string_->length(),

string_->hash_field());

}

@@ -12620,7 +12629,7 @@ MaybeObject* SymbolTable::LookupString(String* string, Object** s) {

// algorithm.

class TwoCharHashTableKey : public HashTableKey {

public:

- TwoCharHashTableKey(uint32_t c1, uint32_t c2, uint32_t seed)

+ TwoCharHashTableKey(uint16_t c1, uint16_t c2, uint32_t seed)

: c1_(c1), c2_(c2) {

// Char 1.

uint32_t hash = seed;

@@ -12636,17 +12645,17 @@ class TwoCharHashTableKey : public HashTableKey {

hash ^= hash >> 11;

hash += hash << 15;

if ((hash & String::kHashBitMask) == 0) hash = StringHasher::kZeroHash;

+ hash_ = hash;

#ifdef DEBUG

- StringHasher hasher(2, seed);

- hasher.AddCharacter(c1);

- hasher.AddCharacter(c2);

// If this assert fails then we failed to reproduce the two-character

// version of the string hashing algorithm above. One reason could be

// that we were passed two digits as characters, since the hash

// algorithm is different in that case.

- ASSERT_EQ(static_cast<int>(hasher.GetHash()), static_cast<int>(hash));

+ uint16_t chars[2] = {c1, c2};

+ uint32_t check_hash = StringHasher::HashSequentialString(chars, 2, seed);

+ hash = (hash << String::kHashShift) | String::kIsNotArrayIndexMask;

+ ASSERT_EQ(static_cast<int32_t>(hash), static_cast<int32_t>(check_hash));

#endif

- hash_ = hash;

}

bool IsMatch(Object* o) {

@@ -12671,8 +12680,8 @@ class TwoCharHashTableKey : public HashTableKey {

}

private:

- uint32_t c1_;

- uint32_t c2_;

+ uint16_t c1_;

+ uint16_t c2_;

uint32_t hash_;

};

@@ -12691,8 +12700,8 @@ bool SymbolTable::LookupSymbolIfExists(String* string, String** symbol) {

}

-bool SymbolTable::LookupTwoCharsSymbolIfExists(uint32_t c1,

- uint32_t c2,

+bool SymbolTable::LookupTwoCharsSymbolIfExists(uint16_t c1,

+ uint16_t c2,

String** symbol) {

TwoCharHashTableKey key(c1, c2, GetHeap()->HashSeed());

int entry = FindEntry(&key);

« src/heap-inl.h ('K') | « src/objects.h ('k') | src/objects-inl.h » ('j') | no next file with comments »