| Index: runtime/vm/object.cc
|
| diff --git a/runtime/vm/object.cc b/runtime/vm/object.cc
|
| index 92f5e2816d771a2307b675257425e2a15c40a24f..ee4459e558363c18bf6f1d40e746c7332cd2959a 100644
|
| --- a/runtime/vm/object.cc
|
| +++ b/runtime/vm/object.cc
|
| @@ -141,16 +141,16 @@ static RawString* IdentifierPrettyName(const String& name) {
|
| bool is_setter = false;
|
|
|
| for (int i = 0; i < name.Length(); i++) {
|
| - if (name.CharAt(i) == ':') {
|
| + if (name.CodeUnitAt(i) == ':') {
|
| ASSERT(start == 0);
|
| - if (name.CharAt(0) == 's') {
|
| + if (name.CodeUnitAt(0) == 's') {
|
| is_setter = true;
|
| }
|
| start = i + 1;
|
| - } else if (name.CharAt(i) == '@') {
|
| + } else if (name.CodeUnitAt(i) == '@') {
|
| ASSERT(at_pos == len);
|
| at_pos = i;
|
| - } else if (name.CharAt(i) == '.') {
|
| + } else if (name.CodeUnitAt(i) == '.') {
|
| dot_pos = i;
|
| break;
|
| }
|
| @@ -168,7 +168,7 @@ static RawString* IdentifierPrettyName(const String& name) {
|
| // "_ReceivePortImpl@6be832b._internal@6be832b".
|
| at_pos = len;
|
| for (int i = dot_pos; i < name.Length(); i++) {
|
| - if (name.CharAt(i) == '@') {
|
| + if (name.CodeUnitAt(i) == '@') {
|
| ASSERT(at_pos == len);
|
| at_pos = i;
|
| }
|
| @@ -489,7 +489,7 @@ void Object::RegisterClass(const Class& cls,
|
| const String& name,
|
| const Library& lib) {
|
| ASSERT(name.Length() > 0);
|
| - ASSERT(name.CharAt(0) != '_');
|
| + ASSERT(name.CodeUnitAt(0) != '_');
|
| cls.set_name(name);
|
| lib.AddClass(cls);
|
| }
|
| @@ -499,7 +499,7 @@ void Object::RegisterPrivateClass(const Class& cls,
|
| const String& public_class_name,
|
| const Library& lib) {
|
| ASSERT(public_class_name.Length() > 0);
|
| - ASSERT(public_class_name.CharAt(0) == '_');
|
| + ASSERT(public_class_name.CodeUnitAt(0) == '_');
|
| String& str = String::Handle();
|
| str = lib.PrivateName(public_class_name);
|
| cls.set_name(str);
|
| @@ -2198,12 +2198,12 @@ static bool MatchesAccessorName(const String& name,
|
| return false;
|
| }
|
| for (intptr_t i = 0; i < prefix_length; i++) {
|
| - if (name.CharAt(i) != prefix[i]) {
|
| + if (name.CodeUnitAt(i) != static_cast<uint32_t>(prefix[i])) {
|
| return false;
|
| }
|
| }
|
| for (intptr_t i = 0, j = prefix_length; i < accessor_name_len; i++, j++) {
|
| - if (name.CharAt(j) != accessor_name.CharAt(i)) {
|
| + if (name.CodeUnitAt(j) != accessor_name.CodeUnitAt(i)) {
|
| return false;
|
| }
|
| }
|
| @@ -4462,13 +4462,13 @@ RawString* TokenStream::GenerateSource() const {
|
| bool is_raw_string = false;
|
| bool escape_characters = false;
|
| for (intptr_t i = 0; i < literal.Length(); i++) {
|
| - if (IsSpecialCharacter(literal.CharAt(i))) {
|
| + if (IsSpecialCharacter(literal.CodeUnitAt(i))) {
|
| escape_characters = true;
|
| }
|
| // TODO(4995): Temp solution for raw strings, this will break
|
| // if we saw a string that is not a raw string but has back slashes
|
| // in it.
|
| - if ((literal.CharAt(i) == '\\')) {
|
| + if ((literal.CodeUnitAt(i) == '\\')) {
|
| if ((next != Token::kINTERPOL_VAR) &&
|
| (next != Token::kINTERPOL_START) &&
|
| (prev != Token::kINTERPOL_VAR) &&
|
| @@ -4496,12 +4496,12 @@ RawString* TokenStream::GenerateSource() const {
|
| }
|
| } else if (curr == Token::kINTERPOL_VAR) {
|
| literals.Add(dollar);
|
| - if (literal.CharAt(0) == Scanner::kPrivateIdentifierStart) {
|
| + if (literal.CodeUnitAt(0) == Scanner::kPrivateIdentifierStart) {
|
| literal = String::SubString(literal, 0, literal.Length() - private_len);
|
| }
|
| literals.Add(literal);
|
| } else if (curr == Token::kIDENT) {
|
| - if (literal.CharAt(0) == Scanner::kPrivateIdentifierStart) {
|
| + if (literal.CodeUnitAt(0) == Scanner::kPrivateIdentifierStart) {
|
| literal = String::SubString(literal, 0, literal.Length() - private_len);
|
| }
|
| literals.Add(literal);
|
| @@ -5045,10 +5045,10 @@ RawString* Script::GetLine(intptr_t line_number) const {
|
| if ((current_line == line_number) && (line_start < 0)) {
|
| line_start = ix;
|
| }
|
| - if (src.CharAt(ix) == '\n') {
|
| + if (src.CodeUnitAt(ix) == '\n') {
|
| current_line++;
|
| - } else if (src.CharAt(ix) == '\r') {
|
| - if ((ix + 1 != src.Length()) && (src.CharAt(ix + 1) != '\n')) {
|
| + } else if (src.CodeUnitAt(ix) == '\r') {
|
| + if ((ix + 1 != src.Length()) && (src.CodeUnitAt(ix + 1) != '\n')) {
|
| current_line++;
|
| }
|
| } else {
|
| @@ -5075,7 +5075,7 @@ RawString* Script::GetSnippet(intptr_t from_line,
|
| intptr_t lookahead = 0;
|
| intptr_t snippet_start = -1;
|
| intptr_t snippet_end = -1;
|
| - char c = src.CharAt(lookahead);
|
| + char c = src.CodeUnitAt(lookahead);
|
| while (lookahead != length) {
|
| if (snippet_start == -1) {
|
| if ((line == from_line) && (column == from_column)) {
|
| @@ -5093,13 +5093,13 @@ RawString* Script::GetSnippet(intptr_t from_line,
|
| lookahead++;
|
| if (lookahead != length) {
|
| // Replace '\r' with '\n' and a sequence of '\r' '\n' with a single '\n'.
|
| - if (src.CharAt(lookahead) == '\r') {
|
| + if (src.CodeUnitAt(lookahead) == '\r') {
|
| c = '\n';
|
| - if (lookahead + 1 != length && src.CharAt(lookahead) == '\n') {
|
| + if (lookahead + 1 != length && src.CodeUnitAt(lookahead) == '\n') {
|
| lookahead++;
|
| }
|
| } else {
|
| - c = src.CharAt(lookahead);
|
| + c = src.CodeUnitAt(lookahead);
|
| }
|
| }
|
| }
|
| @@ -5499,13 +5499,13 @@ RawObject* Library::LookupLocalObject(const String& name) const {
|
| static bool ShouldBePrivate(const String& name) {
|
| return
|
| (name.Length() >= 1 &&
|
| - name.CharAt(0) == '_') ||
|
| + name.CodeUnitAt(0) == '_') ||
|
| (name.Length() >= 5 &&
|
| - (name.CharAt(4) == '_' &&
|
| - (name.CharAt(0) == 'g' || name.CharAt(0) == 's') &&
|
| - name.CharAt(1) == 'e' &&
|
| - name.CharAt(2) == 't' &&
|
| - name.CharAt(3) == ':'));
|
| + (name.CodeUnitAt(4) == '_' &&
|
| + (name.CodeUnitAt(0) == 'g' || name.CodeUnitAt(0) == 's') &&
|
| + name.CodeUnitAt(1) == 'e' &&
|
| + name.CodeUnitAt(2) == 't' &&
|
| + name.CodeUnitAt(3) == ':'));
|
| }
|
|
|
|
|
| @@ -9785,7 +9785,7 @@ intptr_t String::Hash(const String& str, intptr_t begin_index, intptr_t len) {
|
| ASSERT((begin_index + len) <= str.Length());
|
| StringHasher hasher;
|
| for (intptr_t i = 0; i < len; i++) {
|
| - hasher.Add(str.CharAt(begin_index + i));
|
| + hasher.Add(str.CodeUnitAt(begin_index + i));
|
| }
|
| return hasher.Finalize(String::kHashBits);
|
| }
|
| @@ -9817,21 +9817,39 @@ intptr_t String::Hash(const uint32_t* characters, intptr_t len) {
|
| }
|
|
|
|
|
| -int32_t String::CharAt(intptr_t index) const {
|
| +uint32_t String::CharAt(intptr_t index) const {
|
| intptr_t class_id = raw()->GetClassId();
|
| ASSERT(RawObject::IsStringClassId(class_id));
|
| NoGCScope no_gc;
|
| if (class_id == kOneByteStringCid) {
|
| - return *OneByteString::CharAddr(*this, index);
|
| + return OneByteString::CharAt(*this, index);
|
| }
|
| if (class_id == kTwoByteStringCid) {
|
| - return *TwoByteString::CharAddr(*this, index);
|
| + return TwoByteString::CharAt(*this, index);
|
| }
|
| if (class_id == kExternalOneByteStringCid) {
|
| - return *ExternalOneByteString::CharAddr(*this, index);
|
| + return ExternalOneByteString::CharAt(*this, index);
|
| }
|
| ASSERT(class_id == kExternalTwoByteStringCid);
|
| - return *ExternalTwoByteString::CharAddr(*this, index);
|
| + return ExternalTwoByteString::CharAt(*this, index);
|
| +}
|
| +
|
| +
|
| +uint32_t String::CodeUnitAt(intptr_t index) const {
|
| + intptr_t class_id = raw()->GetClassId();
|
| + ASSERT(RawObject::IsStringClassId(class_id));
|
| + NoGCScope no_gc;
|
| + if (class_id == kOneByteStringCid) {
|
| + return OneByteString::CodeUnitAt(*this, index);
|
| + }
|
| + if (class_id == kTwoByteStringCid) {
|
| + return TwoByteString::CodeUnitAt(*this, index);
|
| + }
|
| + if (class_id == kExternalOneByteStringCid) {
|
| + return ExternalOneByteString::CodeUnitAt(*this, index);
|
| + }
|
| + ASSERT(class_id == kExternalTwoByteStringCid);
|
| + return ExternalTwoByteString::CodeUnitAt(*this, index);
|
| }
|
|
|
|
|
| @@ -9883,12 +9901,17 @@ bool String::Equals(const char* str) const {
|
| // Lengths don't match.
|
| return false;
|
| }
|
| - int32_t ch;
|
| + uint32_t ch;
|
| intptr_t consumed = Utf8::Decode(reinterpret_cast<const uint8_t*>(str),
|
| len,
|
| &ch);
|
| - if (consumed == 0 || this->CharAt(i) != ch) {
|
| - return false;
|
| + if (consumed == 0) return false;
|
| +
|
| + if (ch <= Utf16::kMaxCodeUnit) {
|
| + if (this->CodeUnitAt(i) != ch) return false;
|
| + } else {
|
| + if (this->CharAt(i) != ch) return false;
|
| + i++;
|
| }
|
| str += consumed;
|
| len -= consumed;
|
| @@ -9904,7 +9927,7 @@ bool String::Equals(const uint8_t* characters, intptr_t len) const {
|
| }
|
|
|
| for (intptr_t i = 0; i < len; i++) {
|
| - if (this->CharAt(i) != characters[i]) {
|
| + if (this->CodeUnitAt(i) != characters[i]) {
|
| return false;
|
| }
|
| }
|
| @@ -9919,7 +9942,7 @@ bool String::Equals(const uint16_t* characters, intptr_t len) const {
|
| }
|
|
|
| for (intptr_t i = 0; i < len; i++) {
|
| - if (this->CharAt(i) != characters[i]) {
|
| + if (this->CodeUnitAt(i) != characters[i]) {
|
| return false;
|
| }
|
| }
|
| @@ -9934,9 +9957,11 @@ bool String::Equals(const uint32_t* characters, intptr_t len) const {
|
| }
|
|
|
| for (intptr_t i = 0; i < len; i++) {
|
| - if (this->CharAt(i) != static_cast<int32_t>(characters[i])) {
|
| + uint32_t c = this->CharAt(i);
|
| + if (c != characters[i]) {
|
| return false;
|
| }
|
| + if (c > Utf16::kMaxCodeUnit) i++;
|
| }
|
| return true;
|
| }
|
| @@ -9968,7 +9993,7 @@ bool String::StartsWith(const String& other) const {
|
| }
|
| intptr_t slen = other.Length();
|
| for (int i = 0; i < slen; i++) {
|
| - if (this->CharAt(i) != other.CharAt(i)) {
|
| + if (this->CodeUnitAt(i) != other.CodeUnitAt(i)) {
|
| return false;
|
| }
|
| }
|
| @@ -9996,7 +10021,7 @@ RawString* String::New(const uint8_t* utf8_array,
|
| intptr_t array_len,
|
| Heap::Space space) {
|
| Utf8::Type type;
|
| - intptr_t len = Utf8::CodePointCount(utf8_array, array_len, &type);
|
| + intptr_t len = Utf8::CodeUnitCount(utf8_array, array_len, &type);
|
| if (type == Utf8::kAscii) {
|
| const String& strobj = String::Handle(OneByteString::New(len, space));
|
| if (len > 0) {
|
| @@ -10274,7 +10299,7 @@ RawString* String::SubString(const String& str,
|
| intptr_t char_size = str.CharSize();
|
| if (char_size == kTwoByteChar) {
|
| for (intptr_t i = begin_index; i < begin_index + length; ++i) {
|
| - if (str.CharAt(i) > 0x7F) {
|
| + if (str.CodeUnitAt(i) > 0x7F) {
|
| is_one_byte_string = false;
|
| break;
|
| }
|
| @@ -10319,25 +10344,39 @@ RawString* String::Transform(int32_t (*mapping)(int32_t ch),
|
| Heap::Space space) {
|
| ASSERT(!str.IsNull());
|
| bool has_mapping = false;
|
| - int32_t dst_max = 0;
|
| + uint32_t dst_max = 0;
|
| intptr_t len = str.Length();
|
| + intptr_t out_len = 0;
|
| // TODO(cshapiro): assume a transform is required, rollback if not.
|
| - for (intptr_t i = 0; i < len; ++i) {
|
| - int32_t src = str.CharAt(i);
|
| - int32_t dst = mapping(src);
|
| + intptr_t i = 0;
|
| + for (; i < len; ++i) {
|
| + uint32_t src = str.CodeUnitAt(i);
|
| + if (Utf16::IsSurrogate(src)) break;
|
| + uint32_t dst = mapping(src);
|
| + if (src != dst) {
|
| + has_mapping = true;
|
| + }
|
| + dst_max = Utils::Maximum(dst_max, dst);
|
| + out_len += dst > Utf16::kMaxCodeUnit ? 2 : 1;
|
| + }
|
| + for (; i < len; ++i) {
|
| + uint32_t src = str.CharAt(i);
|
| + uint32_t dst = mapping(src);
|
| if (src != dst) {
|
| has_mapping = true;
|
| }
|
| dst_max = Utils::Maximum(dst_max, dst);
|
| + out_len += dst > Utf16::kMaxCodeUnit ? 2 : 1;
|
| + if (src > Utf16::kMaxCodeUnit) ++i;
|
| }
|
| if (!has_mapping) {
|
| return str.raw();
|
| }
|
| if (dst_max <= 0x7F) {
|
| - return OneByteString::Transform(mapping, str, space);
|
| + return OneByteString::Transform(mapping, str, out_len, space);
|
| }
|
| ASSERT(dst_max > 0x7F);
|
| - return TwoByteString::Transform(mapping, str, space);
|
| + return TwoByteString::Transform(mapping, str, out_len, space);
|
| }
|
|
|
|
|
| @@ -10544,14 +10583,18 @@ RawOneByteString* OneByteString::ConcatAll(const Array& strings,
|
|
|
| RawOneByteString* OneByteString::Transform(int32_t (*mapping)(int32_t ch),
|
| const String& str,
|
| + int out_length,
|
| Heap::Space space) {
|
| ASSERT(!str.IsNull());
|
| intptr_t len = str.Length();
|
| - const String& result = String::Handle(OneByteString::New(len, space));
|
| - for (intptr_t i = 0; i < len; ++i) {
|
| - int32_t ch = mapping(str.CharAt(i));
|
| - ASSERT(ch >= 0 && ch <= 0x7F);
|
| - *CharAddr(result, i) = ch;
|
| + const String& result =
|
| + String::Handle(OneByteString::New(out_length, space));
|
| + for (intptr_t i = 0, j = 0; i < len; ++i, j++) {
|
| + uint32_t old_ch = str.CharAt(i);
|
| + if (old_ch > Utf16::kMaxCodeUnit) i++;
|
| + uint32_t ch = mapping(old_ch);
|
| + ASSERT(ch <= 0x7Fu);
|
| + *CharAddr(result, j) = ch;
|
| }
|
| return OneByteString::raw(result);
|
| }
|
| @@ -10635,9 +10678,11 @@ RawTwoByteString* TwoByteString::New(intptr_t utf16_len,
|
| NoGCScope no_gc;
|
| intptr_t j = 0;
|
| for (intptr_t i = 0; i < array_len; ++i) {
|
| - if (utf32_array[i] > 0xffff) {
|
| + uint32_t code_point = utf32_array[i];
|
| + if (code_point > Utf16::kMaxCodeUnit) {
|
| ASSERT(j < (utf16_len - 1));
|
| - Utf8::ConvertUTF32ToUTF16(utf32_array[i], CharAddr(result, j));
|
| + *CharAddr(result, j) = Utf16::LeadFromCodePoint(code_point);
|
| + *CharAddr(result, j + 1) = Utf16::TrailFromCodePoint(code_point);
|
| j += 2;
|
| } else {
|
| ASSERT(j < utf16_len);
|
| @@ -10691,14 +10736,25 @@ RawTwoByteString* TwoByteString::ConcatAll(const Array& strings,
|
|
|
| RawTwoByteString* TwoByteString::Transform(int32_t (*mapping)(int32_t ch),
|
| const String& str,
|
| + int out_length,
|
| Heap::Space space) {
|
| ASSERT(!str.IsNull());
|
| intptr_t len = str.Length();
|
| - const String& result = String::Handle(TwoByteString::New(len, space));
|
| - for (intptr_t i = 0; i < len; ++i) {
|
| - int32_t ch = mapping(str.CharAt(i));
|
| - ASSERT(ch >= 0 && ch <= 0xFFFF);
|
| - *CharAddr(result, i) = ch;
|
| + const String& result =
|
| + String::Handle(TwoByteString::New(out_length, space));
|
| + for (intptr_t i = 0, j = 0; i < len; ++i, j++) {
|
| + uint32_t old_ch = str.CharAt(i);
|
| + if (old_ch > Utf16::kMaxCodeUnit) i++;
|
| + uint32_t ch = mapping(old_ch);
|
| + ASSERT(ch <= Utf16::kMaxCodePoint);
|
| + if (ch <= Utf16::kMaxCodeUnit) {
|
| + *CharAddr(result, j) = ch;
|
| + } else {
|
| + *CharAddr(result, j) = Utf16::LeadFromCodePoint(ch);
|
| + *CharAddr(result, j + 1) = Utf16::TrailFromCodePoint(ch);
|
| + ++j;
|
| + }
|
| + ASSERT(j <= out_length);
|
| }
|
| return TwoByteString::raw(result);
|
| }
|
|
|