| Index: src/objects.cc
|
| ===================================================================
|
| --- src/objects.cc (revision 10944)
|
| +++ src/objects.cc (working copy)
|
| @@ -6043,9 +6043,11 @@
|
| buffer->Reset(offset, this);
|
| int character_position = offset;
|
| int utf8_bytes = 0;
|
| + int last = unibrow::Utf16::kNoPreviousCharacter;
|
| while (buffer->has_more() && character_position++ < offset + length) {
|
| uint16_t character = buffer->GetNext();
|
| - utf8_bytes += unibrow::Utf8::Length(character);
|
| + utf8_bytes += unibrow::Utf8::Length(character, last);
|
| + last = character;
|
| }
|
|
|
| if (length_return) {
|
| @@ -6059,13 +6061,15 @@
|
| buffer->Seek(offset);
|
| character_position = offset;
|
| int utf8_byte_position = 0;
|
| + last = unibrow::Utf16::kNoPreviousCharacter;
|
| while (buffer->has_more() && character_position++ < offset + length) {
|
| uint16_t character = buffer->GetNext();
|
| if (allow_nulls == DISALLOW_NULLS && character == 0) {
|
| character = ' ';
|
| }
|
| utf8_byte_position +=
|
| - unibrow::Utf8::Encode(result + utf8_byte_position, character);
|
| + unibrow::Utf8::Encode(result + utf8_byte_position, character, last);
|
| + last = character;
|
| }
|
| result[utf8_byte_position] = 0;
|
| return SmartArrayPointer<char>(result);
|
| @@ -6379,73 +6383,6 @@
|
| }
|
|
|
|
|
| -// This method determines the type of string involved and then gets the UTF8
|
| -// length of the string. It doesn't flatten the string and has log(n) recursion
|
| -// for a string of length n.
|
| -int String::Utf8Length(String* input, int from, int to) {
|
| - if (from == to) return 0;
|
| - int total = 0;
|
| - while (true) {
|
| - if (input->IsAsciiRepresentation()) return total + to - from;
|
| - switch (StringShape(input).representation_tag()) {
|
| - case kConsStringTag: {
|
| - ConsString* str = ConsString::cast(input);
|
| - String* first = str->first();
|
| - String* second = str->second();
|
| - int first_length = first->length();
|
| - if (first_length - from < to - first_length) {
|
| - if (first_length > from) {
|
| - // Left hand side is shorter.
|
| - total += Utf8Length(first, from, first_length);
|
| - input = second;
|
| - from = 0;
|
| - to -= first_length;
|
| - } else {
|
| - // We only need the right hand side.
|
| - input = second;
|
| - from -= first_length;
|
| - to -= first_length;
|
| - }
|
| - } else {
|
| - if (first_length <= to) {
|
| - // Right hand side is shorter.
|
| - total += Utf8Length(second, 0, to - first_length);
|
| - input = first;
|
| - to = first_length;
|
| - } else {
|
| - // We only need the left hand side.
|
| - input = first;
|
| - }
|
| - }
|
| - continue;
|
| - }
|
| - case kExternalStringTag:
|
| - case kSeqStringTag: {
|
| - Vector<const uc16> vector = input->GetFlatContent().ToUC16Vector();
|
| - const uc16* p = vector.start();
|
| - for (int i = from; i < to; i++) {
|
| - total += unibrow::Utf8::Length(p[i]);
|
| - }
|
| - return total;
|
| - }
|
| - case kSlicedStringTag: {
|
| - SlicedString* str = SlicedString::cast(input);
|
| - int offset = str->offset();
|
| - input = str->parent();
|
| - from += offset;
|
| - to += offset;
|
| - continue;
|
| - }
|
| - default:
|
| - break;
|
| - }
|
| - UNREACHABLE();
|
| - return 0;
|
| - }
|
| - return 0;
|
| -}
|
| -
|
| -
|
| void Relocatable::PostGarbageCollectionProcessing() {
|
| Isolate* isolate = Isolate::Current();
|
| Relocatable* current = isolate->relocatable_top();
|
| @@ -6839,8 +6776,10 @@
|
| // General slow case check. We know that the ia and ib iterators
|
| // have the same length.
|
| while (ia->has_more()) {
|
| - uc32 ca = ia->GetNext();
|
| - uc32 cb = ib->GetNext();
|
| + uint32_t ca = ia->GetNext();
|
| + uint32_t cb = ib->GetNext();
|
| + ASSERT(ca <= unibrow::Utf16::kMaxNonSurrogateCharCode);
|
| + ASSERT(cb <= unibrow::Utf16::kMaxNonSurrogateCharCode);
|
| if (ca != cb)
|
| return false;
|
| }
|
| @@ -7023,8 +6962,14 @@
|
| decoder->Reset(str.start(), str.length());
|
| int i;
|
| for (i = 0; i < slen && decoder->has_more(); i++) {
|
| - uc32 r = decoder->GetNext();
|
| - if (Get(i) != r) return false;
|
| + uint32_t r = decoder->GetNext();
|
| + if (r > unibrow::Utf16::kMaxNonSurrogateCharCode) {
|
| + if (i > slen - 1) return false;
|
| + if (Get(i++) != unibrow::Utf16::LeadSurrogate(r)) return false;
|
| + if (Get(i) != unibrow::Utf16::TrailSurrogate(r)) return false;
|
| + } else {
|
| + if (Get(i) != r) return false;
|
| + }
|
| }
|
| return i == slen && !decoder->has_more();
|
| }
|
| @@ -7154,6 +7099,22 @@
|
| }
|
|
|
|
|
| +void StringHasher::AddSurrogatePair(uc32 c) {
|
| + uint16_t lead = unibrow::Utf16::LeadSurrogate(c);
|
| + AddCharacter(lead);
|
| + uint16_t trail = unibrow::Utf16::TrailSurrogate(c);
|
| + AddCharacter(trail);
|
| +}
|
| +
|
| +
|
| +void StringHasher::AddSurrogatePairNoIndex(uc32 c) {
|
| + uint16_t lead = unibrow::Utf16::LeadSurrogate(c);
|
| + AddCharacterNoIndex(lead);
|
| + uint16_t trail = unibrow::Utf16::TrailSurrogate(c);
|
| + AddCharacterNoIndex(trail);
|
| +}
|
| +
|
| +
|
| uint32_t StringHasher::GetHashField() {
|
| ASSERT(is_valid());
|
| if (length_ <= String::kMaxHashCalcLength) {
|
| @@ -10746,7 +10707,7 @@
|
| if (hash_field_ != 0) return hash_field_ >> String::kHashShift;
|
| unibrow::Utf8InputBuffer<> buffer(string_.start(),
|
| static_cast<unsigned>(string_.length()));
|
| - chars_ = buffer.Length();
|
| + chars_ = buffer.Utf16Length();
|
| hash_field_ = String::ComputeHashField(&buffer, chars_, seed_);
|
| uint32_t result = hash_field_ >> String::kHashShift;
|
| ASSERT(result != 0); // Ensure that the hash value of 0 is never computed.
|
|
|