Chromium Code Reviews| Index: runtime/vm/object.cc |
| diff --git a/runtime/vm/object.cc b/runtime/vm/object.cc |
| index 8e1ad649c9101ed396520087d9e4d5edd0e3d118..0758bcae27db294f864701b9bacaaedff7a00119 100644 |
| --- a/runtime/vm/object.cc |
| +++ b/runtime/vm/object.cc |
| @@ -141,16 +141,16 @@ static RawString* IdentifierPrettyName(const String& name) { |
| bool is_setter = false; |
| for (int i = 0; i < name.Length(); i++) { |
| - if (name.CharAt(i) == ':') { |
| + if (name.CodeUnitAt(i) == ':') { |
|
cshapiro
2012/11/15 20:14:51
This file is an example of why we would like the r
erikcorry
2012/11/15 23:47:05
Done.
|
| ASSERT(start == 0); |
| - if (name.CharAt(0) == 's') { |
| + if (name.CodeUnitAt(0) == 's') { |
| is_setter = true; |
| } |
| start = i + 1; |
| - } else if (name.CharAt(i) == '@') { |
| + } else if (name.CodeUnitAt(i) == '@') { |
| ASSERT(at_pos == len); |
| at_pos = i; |
| - } else if (name.CharAt(i) == '.') { |
| + } else if (name.CodeUnitAt(i) == '.') { |
| dot_pos = i; |
| break; |
| } |
| @@ -168,7 +168,7 @@ static RawString* IdentifierPrettyName(const String& name) { |
| // "_ReceivePortImpl@6be832b._internal@6be832b". |
| at_pos = len; |
| for (int i = dot_pos; i < name.Length(); i++) { |
| - if (name.CharAt(i) == '@') { |
| + if (name.CodeUnitAt(i) == '@') { |
| ASSERT(at_pos == len); |
| at_pos = i; |
| } |
| @@ -489,7 +489,7 @@ void Object::RegisterClass(const Class& cls, |
| const String& name, |
| const Library& lib) { |
| ASSERT(name.Length() > 0); |
| - ASSERT(name.CharAt(0) != '_'); |
| + ASSERT(name.CodeUnitAt(0) != '_'); |
| cls.set_name(name); |
| lib.AddClass(cls); |
| } |
| @@ -499,7 +499,7 @@ void Object::RegisterPrivateClass(const Class& cls, |
| const String& public_class_name, |
| const Library& lib) { |
| ASSERT(public_class_name.Length() > 0); |
| - ASSERT(public_class_name.CharAt(0) == '_'); |
| + ASSERT(public_class_name.CodeUnitAt(0) == '_'); |
| String& str = String::Handle(); |
| str = lib.PrivateName(public_class_name); |
| cls.set_name(str); |
| @@ -2187,12 +2187,12 @@ static bool MatchesAccessorName(const String& name, |
| return false; |
| } |
| for (intptr_t i = 0; i < prefix_length; i++) { |
| - if (name.CharAt(i) != prefix[i]) { |
| + if (name.CodeUnitAt(i) != static_cast<uint32_t>(prefix[i])) { |
| return false; |
| } |
| } |
| for (intptr_t i = 0, j = prefix_length; i < accessor_name_len; i++, j++) { |
| - if (name.CharAt(j) != accessor_name.CharAt(i)) { |
| + if (name.CodeUnitAt(j) != accessor_name.CodeUnitAt(i)) { |
| return false; |
| } |
| } |
| @@ -4497,13 +4497,13 @@ RawString* TokenStream::GenerateSource() const { |
| bool is_raw_string = false; |
| bool escape_characters = false; |
| for (intptr_t i = 0; i < literal.Length(); i++) { |
| - if (IsSpecialCharacter(literal.CharAt(i))) { |
| + if (IsSpecialCharacter(literal.CodeUnitAt(i))) { |
| escape_characters = true; |
| } |
| // TODO(4995): Temp solution for raw strings, this will break |
| // if we saw a string that is not a raw string but has back slashes |
| // in it. |
| - if ((literal.CharAt(i) == '\\')) { |
| + if ((literal.CodeUnitAt(i) == '\\')) { |
| if ((next != Token::kINTERPOL_VAR) && |
| (next != Token::kINTERPOL_START) && |
| (prev != Token::kINTERPOL_VAR) && |
| @@ -4531,12 +4531,12 @@ RawString* TokenStream::GenerateSource() const { |
| } |
| } else if (curr == Token::kINTERPOL_VAR) { |
| literals.Add(dollar); |
| - if (literal.CharAt(0) == Scanner::kPrivateIdentifierStart) { |
| + if (literal.CodeUnitAt(0) == Scanner::kPrivateIdentifierStart) { |
| literal = String::SubString(literal, 0, literal.Length() - private_len); |
| } |
| literals.Add(literal); |
| } else if (curr == Token::kIDENT) { |
| - if (literal.CharAt(0) == Scanner::kPrivateIdentifierStart) { |
| + if (literal.CodeUnitAt(0) == Scanner::kPrivateIdentifierStart) { |
| literal = String::SubString(literal, 0, literal.Length() - private_len); |
| } |
| literals.Add(literal); |
| @@ -5080,10 +5080,10 @@ RawString* Script::GetLine(intptr_t line_number) const { |
| if ((current_line == line_number) && (line_start < 0)) { |
| line_start = ix; |
| } |
| - if (src.CharAt(ix) == '\n') { |
| + if (src.CodeUnitAt(ix) == '\n') { |
| current_line++; |
| - } else if (src.CharAt(ix) == '\r') { |
| - if ((ix + 1 != src.Length()) && (src.CharAt(ix + 1) != '\n')) { |
| + } else if (src.CodeUnitAt(ix) == '\r') { |
| + if ((ix + 1 != src.Length()) && (src.CodeUnitAt(ix + 1) != '\n')) { |
| current_line++; |
| } |
| } else { |
| @@ -5110,7 +5110,7 @@ RawString* Script::GetSnippet(intptr_t from_line, |
| intptr_t lookahead = 0; |
| intptr_t snippet_start = -1; |
| intptr_t snippet_end = -1; |
| - char c = src.CharAt(lookahead); |
| + char c = src.CodeUnitAt(lookahead); |
| while (lookahead != length) { |
| if (snippet_start == -1) { |
| if ((line == from_line) && (column == from_column)) { |
| @@ -5128,13 +5128,13 @@ RawString* Script::GetSnippet(intptr_t from_line, |
| lookahead++; |
| if (lookahead != length) { |
| // Replace '\r' with '\n' and a sequence of '\r' '\n' with a single '\n'. |
| - if (src.CharAt(lookahead) == '\r') { |
| + if (src.CodeUnitAt(lookahead) == '\r') { |
| c = '\n'; |
| - if (lookahead + 1 != length && src.CharAt(lookahead) == '\n') { |
| + if (lookahead + 1 != length && src.CodeUnitAt(lookahead) == '\n') { |
| lookahead++; |
| } |
| } else { |
| - c = src.CharAt(lookahead); |
| + c = src.CodeUnitAt(lookahead); |
| } |
| } |
| } |
| @@ -5534,13 +5534,13 @@ RawObject* Library::LookupLocalObject(const String& name) const { |
| static bool ShouldBePrivate(const String& name) { |
| return |
| (name.Length() >= 1 && |
| - name.CharAt(0) == '_') || |
| + name.CodeUnitAt(0) == '_') || |
| (name.Length() >= 5 && |
| - (name.CharAt(4) == '_' && |
| - (name.CharAt(0) == 'g' || name.CharAt(0) == 's') && |
| - name.CharAt(1) == 'e' && |
| - name.CharAt(2) == 't' && |
| - name.CharAt(3) == ':')); |
| + (name.CodeUnitAt(4) == '_' && |
| + (name.CodeUnitAt(0) == 'g' || name.CodeUnitAt(0) == 's') && |
| + name.CodeUnitAt(1) == 'e' && |
| + name.CodeUnitAt(2) == 't' && |
| + name.CodeUnitAt(3) == ':')); |
| } |
| @@ -9855,7 +9855,7 @@ intptr_t String::Hash(const String& str, intptr_t begin_index, intptr_t len) { |
| ASSERT((begin_index + len) <= str.Length()); |
| StringHasher hasher; |
| for (intptr_t i = 0; i < len; i++) { |
| - hasher.Add(str.CharAt(begin_index + i)); |
| + hasher.Add(str.CodeUnitAt(begin_index + i)); |
| } |
| return hasher.Finalize(String::kHashBits); |
| } |
| @@ -9887,21 +9887,21 @@ intptr_t String::Hash(const uint32_t* characters, intptr_t len) { |
| } |
| -int32_t String::CharAt(intptr_t index) const { |
| +uint32_t String::CodeUnitAt(intptr_t index) const { |
| intptr_t class_id = raw()->GetClassId(); |
| ASSERT(RawObject::IsStringClassId(class_id)); |
| NoGCScope no_gc; |
| if (class_id == kOneByteStringCid) { |
| - return *OneByteString::CharAddr(*this, index); |
| + return OneByteString::CodeUnitAt(*this, index); |
| } |
| if (class_id == kTwoByteStringCid) { |
| - return *TwoByteString::CharAddr(*this, index); |
| + return TwoByteString::CodeUnitAt(*this, index); |
| } |
| if (class_id == kExternalOneByteStringCid) { |
| - return *ExternalOneByteString::CharAddr(*this, index); |
| + return ExternalOneByteString::CodeUnitAt(*this, index); |
| } |
| ASSERT(class_id == kExternalTwoByteStringCid); |
| - return *ExternalTwoByteString::CharAddr(*this, index); |
| + return ExternalTwoByteString::CodeUnitAt(*this, index); |
| } |
| @@ -9953,12 +9953,17 @@ bool String::Equals(const char* str) const { |
| // Lengths don't match. |
| return false; |
| } |
| - int32_t ch; |
| + uint32_t ch; |
| intptr_t consumed = Utf8::Decode(reinterpret_cast<const uint8_t*>(str), |
| len, |
| &ch); |
| - if (consumed == 0 || this->CharAt(i) != ch) { |
| - return false; |
| + if (consumed == 0) return false; |
| + |
| + if (ch <= Utf16::kMaxCodeUnit) { |
| + if (this->CodeUnitAt(i) != ch) return false; |
| + } else { |
| + if (Utf16::CodePointAt(*this, i) != ch) return false; |
| + i++; |
| } |
| str += consumed; |
| len -= consumed; |
| @@ -9974,7 +9979,7 @@ bool String::Equals(const uint8_t* characters, intptr_t len) const { |
| } |
| for (intptr_t i = 0; i < len; i++) { |
| - if (this->CharAt(i) != characters[i]) { |
| + if (this->CodeUnitAt(i) != characters[i]) { |
| return false; |
| } |
| } |
| @@ -9989,7 +9994,7 @@ bool String::Equals(const uint16_t* characters, intptr_t len) const { |
| } |
| for (intptr_t i = 0; i < len; i++) { |
| - if (this->CharAt(i) != characters[i]) { |
| + if (this->CodeUnitAt(i) != characters[i]) { |
| return false; |
| } |
| } |
| @@ -10004,9 +10009,11 @@ bool String::Equals(const uint32_t* characters, intptr_t len) const { |
| } |
| for (intptr_t i = 0; i < len; i++) { |
| - if (this->CharAt(i) != static_cast<int32_t>(characters[i])) { |
| + uint32_t c = this->CodeUnitAt(i); |
| + if (c != characters[i]) { |
| return false; |
| } |
| + if (c > Utf16::kMaxCodeUnit) i++; |
| } |
| return true; |
| } |
| @@ -10016,13 +10023,15 @@ intptr_t String::CompareTo(const String& other) const { |
| const intptr_t this_len = this->Length(); |
| const intptr_t other_len = other.IsNull() ? 0 : other.Length(); |
| const intptr_t len = (this_len < other_len) ? this_len : other_len; |
| + // UTF-16 has the high surrogate before the low surrogate so we can compare |
| + // one code unit at a time for efficiency and still get the right ordering. |
| for (intptr_t i = 0; i < len; i++) { |
| - int32_t this_code_point = this->CharAt(i); |
| - int32_t other_code_point = other.CharAt(i); |
| - if (this_code_point < other_code_point) { |
| + int32_t this_code_unit = this->CodeUnitAt(i); |
| + int32_t other_code_unit = other.CodeUnitAt(i); |
| + if (this_code_unit < other_code_unit) { |
| return -1; |
| } |
| - if (this_code_point > other_code_point) { |
| + if (this_code_unit > other_code_unit) { |
| return 1; |
| } |
| } |
| @@ -10038,7 +10047,7 @@ bool String::StartsWith(const String& other) const { |
| } |
| intptr_t slen = other.Length(); |
| for (int i = 0; i < slen; i++) { |
| - if (this->CharAt(i) != other.CharAt(i)) { |
| + if (this->CodeUnitAt(i) != other.CodeUnitAt(i)) { |
| return false; |
| } |
| } |
| @@ -10066,7 +10075,7 @@ RawString* String::New(const uint8_t* utf8_array, |
| intptr_t array_len, |
| Heap::Space space) { |
| Utf8::Type type; |
| - intptr_t len = Utf8::CodePointCount(utf8_array, array_len, &type); |
| + intptr_t len = Utf8::CodeUnitCount(utf8_array, array_len, &type); |
| if (type == Utf8::kAscii) { |
| const String& strobj = String::Handle(OneByteString::New(len, space)); |
| if (len > 0) { |
| @@ -10344,7 +10353,7 @@ RawString* String::SubString(const String& str, |
| intptr_t char_size = str.CharSize(); |
| if (char_size == kTwoByteChar) { |
| for (intptr_t i = begin_index; i < begin_index + length; ++i) { |
| - if (str.CharAt(i) > 0x7F) { |
| + if (str.CodeUnitAt(i) > 0x7F) { |
| is_one_byte_string = false; |
| break; |
| } |
| @@ -10389,25 +10398,39 @@ RawString* String::Transform(int32_t (*mapping)(int32_t ch), |
| Heap::Space space) { |
| ASSERT(!str.IsNull()); |
| bool has_mapping = false; |
| - int32_t dst_max = 0; |
| + uint32_t dst_max = 0; |
| intptr_t len = str.Length(); |
| + intptr_t out_len = 0; |
| // TODO(cshapiro): assume a transform is required, rollback if not. |
| - for (intptr_t i = 0; i < len; ++i) { |
| - int32_t src = str.CharAt(i); |
| - int32_t dst = mapping(src); |
| + intptr_t i = 0; |
| + for (; i < len; ++i) { |
| + uint32_t src = str.CodeUnitAt(i); |
| + if (Utf16::IsSurrogate(src)) break; |
| + uint32_t dst = mapping(src); |
| + if (src != dst) { |
| + has_mapping = true; |
| + } |
| + dst_max = Utils::Maximum(dst_max, dst); |
| + out_len += dst > Utf16::kMaxCodeUnit ? 2 : 1; |
| + } |
| + for (; i < len; ++i) { |
| + uint32_t src = Utf16::CodePointAt(str, i); |
| + uint32_t dst = mapping(src); |
| if (src != dst) { |
| has_mapping = true; |
| } |
| dst_max = Utils::Maximum(dst_max, dst); |
| + out_len += dst > Utf16::kMaxCodeUnit ? 2 : 1; |
| + if (src > Utf16::kMaxCodeUnit) ++i; |
| } |
| if (!has_mapping) { |
| return str.raw(); |
| } |
| if (dst_max <= 0x7F) { |
| - return OneByteString::Transform(mapping, str, space); |
| + return OneByteString::Transform(mapping, str, out_len, space); |
| } |
| ASSERT(dst_max > 0x7F); |
| - return TwoByteString::Transform(mapping, str, space); |
| + return TwoByteString::Transform(mapping, str, out_len, space); |
| } |
| @@ -10614,14 +10637,18 @@ RawOneByteString* OneByteString::ConcatAll(const Array& strings, |
| RawOneByteString* OneByteString::Transform(int32_t (*mapping)(int32_t ch), |
| const String& str, |
| + int out_length, |
| Heap::Space space) { |
| ASSERT(!str.IsNull()); |
| intptr_t len = str.Length(); |
| - const String& result = String::Handle(OneByteString::New(len, space)); |
| - for (intptr_t i = 0; i < len; ++i) { |
| - int32_t ch = mapping(str.CharAt(i)); |
| - ASSERT(ch >= 0 && ch <= 0x7F); |
| - *CharAddr(result, i) = ch; |
| + const String& result = |
| + String::Handle(OneByteString::New(out_length, space)); |
| + for (intptr_t i = 0, j = 0; i < len; ++i, j++) { |
| + uint32_t old_ch = str.CodeUnitAt(i); |
| + if (old_ch > Utf16::kMaxCodeUnit) i++; |
| + uint32_t ch = mapping(old_ch); |
| + ASSERT(ch <= 0x7Fu); |
| + *CharAddr(result, j) = ch; |
| } |
| return OneByteString::raw(result); |
| } |
| @@ -10705,9 +10732,11 @@ RawTwoByteString* TwoByteString::New(intptr_t utf16_len, |
| NoGCScope no_gc; |
| intptr_t j = 0; |
| for (intptr_t i = 0; i < array_len; ++i) { |
| - if (utf32_array[i] > 0xffff) { |
| + uint32_t code_point = utf32_array[i]; |
| + if (code_point > Utf16::kMaxCodeUnit) { |
| ASSERT(j < (utf16_len - 1)); |
| - Utf8::ConvertUTF32ToUTF16(utf32_array[i], CharAddr(result, j)); |
| + *CharAddr(result, j) = Utf16::LeadFromCodePoint(code_point); |
| + *CharAddr(result, j + 1) = Utf16::TrailFromCodePoint(code_point); |
| j += 2; |
| } else { |
| ASSERT(j < utf16_len); |
| @@ -10761,14 +10790,25 @@ RawTwoByteString* TwoByteString::ConcatAll(const Array& strings, |
| RawTwoByteString* TwoByteString::Transform(int32_t (*mapping)(int32_t ch), |
| const String& str, |
| + int out_length, |
| Heap::Space space) { |
| ASSERT(!str.IsNull()); |
| intptr_t len = str.Length(); |
| - const String& result = String::Handle(TwoByteString::New(len, space)); |
| - for (intptr_t i = 0; i < len; ++i) { |
| - int32_t ch = mapping(str.CharAt(i)); |
| - ASSERT(ch >= 0 && ch <= 0xFFFF); |
| - *CharAddr(result, i) = ch; |
| + const String& result = |
| + String::Handle(TwoByteString::New(out_length, space)); |
| + for (intptr_t i = 0, j = 0; i < len; ++i, j++) { |
| + uint32_t old_ch = Utf16::CodePointAt(str, i); |
| + if (old_ch > Utf16::kMaxCodeUnit) i++; |
| + uint32_t ch = mapping(old_ch); |
| + ASSERT(ch <= Utf16::kMaxCodePoint); |
| + if (ch <= Utf16::kMaxCodeUnit) { |
| + *CharAddr(result, j) = ch; |
| + } else { |
| + *CharAddr(result, j) = Utf16::LeadFromCodePoint(ch); |
| + *CharAddr(result, j + 1) = Utf16::TrailFromCodePoint(ch); |
| + ++j; |
| + } |
| + ASSERT(j <= out_length); |
| } |
| return TwoByteString::raw(result); |
| } |