Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(272)

Side by Side Diff: runtime/vm/dart_api_message.cc

Issue 11280150: Add support for surrogates when serializing and deserializing for native ports (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Added Utf16::CodePointIterator Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | runtime/vm/snapshot_test.cc » ('j') | runtime/vm/unicode.h » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/dart_api_message.h" 5 #include "vm/dart_api_message.h"
6 #include "vm/object.h" 6 #include "vm/object.h"
7 #include "vm/snapshot_ids.h" 7 #include "vm/snapshot_ids.h"
8 #include "vm/symbols.h" 8 #include "vm/symbols.h"
9 #include "vm/unicode.h" 9 #include "vm/unicode.h"
10 10
(...skipping 371 matching lines...) Expand 10 before | Expand all | Expand 10 after
382 ::free(latin1); 382 ::free(latin1);
383 return object; 383 return object;
384 } 384 }
385 case kTwoByteStringCid: { 385 case kTwoByteStringCid: {
386 intptr_t len = ReadSmiValue(); 386 intptr_t len = ReadSmiValue();
387 intptr_t hash = ReadSmiValue(); 387 intptr_t hash = ReadSmiValue();
388 USE(hash); 388 USE(hash);
389 uint16_t *utf16 = 389 uint16_t *utf16 =
390 reinterpret_cast<uint16_t*>(::malloc(len * sizeof(uint16_t))); 390 reinterpret_cast<uint16_t*>(::malloc(len * sizeof(uint16_t)));
391 intptr_t utf8_len = 0; 391 intptr_t utf8_len = 0;
392 // Read all the UTF-16 code units.
392 for (intptr_t i = 0; i < len; i++) { 393 for (intptr_t i = 0; i < len; i++) {
393 utf16[i] = Read<uint16_t>(); 394 utf16[i] = Read<uint16_t>();
394 // TODO(sgjesse): Check for surrogate pairs. 395 }
395 utf8_len += Utf8::Length(utf16[i]); 396 // Calculate the UTF-8 length.
397 Utf16::CodePointIterator it(utf16, len);
siva 2012/11/27 03:00:25 If you get invalid characters here it.Next() could
Søren Gjesse 2012/11/27 11:35:54 There are no invalid characters in an UTF-16 seque
398 while (it.Next()) {
399 utf8_len += Utf8::Length(it.Current());
396 } 400 }
397 Dart_CObject* object = AllocateDartCObjectString(utf8_len); 401 Dart_CObject* object = AllocateDartCObjectString(utf8_len);
398 AddBackRef(object_id, object, kIsDeserialized); 402 AddBackRef(object_id, object, kIsDeserialized);
399 char* p = object->value.as_string; 403 char* p = object->value.as_string;
400 for (intptr_t i = 0; i < len; i++) { 404 Utf16::CodePointIterator it2(utf16, len);
siva 2012/11/27 03:00:25 Would it make sense to have a reset method on the
Søren Gjesse 2012/11/27 11:35:54 Good point added Reset() here and for String::Code
401 // TODO(sgjesse): Check for surrogate pairs. 405 while (it2.Next()) {
402 p += Utf8::Encode(utf16[i], p); 406 p += Utf8::Encode(it2.Current(), p);
403 } 407 }
404 *p = '\0'; 408 *p = '\0';
405 ASSERT(p == (object->value.as_string + utf8_len)); 409 ASSERT(p == (object->value.as_string + utf8_len));
406 ::free(utf16); 410 ::free(utf16);
407 return object; 411 return object;
408 } 412 }
409 case kUint8ArrayCid: { 413 case kUint8ArrayCid: {
410 intptr_t len = ReadSmiValue(); 414 intptr_t len = ReadSmiValue();
411 Dart_CObject* object = AllocateDartCObjectUint8Array(len); 415 Dart_CObject* object = AllocateDartCObjectUint8Array(len);
412 AddBackRef(object_id, object, kIsDeserialized); 416 AddBackRef(object_id, object, kIsDeserialized);
(...skipping 368 matching lines...) Expand 10 before | Expand all | Expand 10 after
781 // Write out the class and tags information. 785 // Write out the class and tags information.
782 WriteIndexedObject(kDoubleCid); 786 WriteIndexedObject(kDoubleCid);
783 WriteIntptrValue(0); 787 WriteIntptrValue(0);
784 // Write double value. 788 // Write double value.
785 Write<double>(object->value.as_double); 789 Write<double>(object->value.as_double);
786 break; 790 break;
787 case Dart_CObject::kString: { 791 case Dart_CObject::kString: {
788 const uint8_t* utf8_str = 792 const uint8_t* utf8_str =
789 reinterpret_cast<const uint8_t*>(object->value.as_string); 793 reinterpret_cast<const uint8_t*>(object->value.as_string);
790 intptr_t utf8_len = strlen(object->value.as_string); 794 intptr_t utf8_len = strlen(object->value.as_string);
791 if (!Utf8::IsValid(utf8_str, utf8_len)) { 795 if (!Utf8::IsValidAllowSurrogates(utf8_str, utf8_len)) {
siva 2012/11/27 03:00:25 I am not sure I understand the need for this to be
Søren Gjesse 2012/11/27 11:35:54 The current Utf8::IsValid does not allow for Utf8
siva 2012/11/28 03:28:23 I was under the impression that we allow for Utf8
792 return false; 796 return false;
793 } 797 }
794 798
795 Utf8::Type type; 799 Utf8::Type type;
796 intptr_t len = Utf8::CodePointCount(utf8_str, utf8_len, &type); 800 intptr_t len = Utf8::CodePointCount(utf8_str, utf8_len, &type);
797 801
798 // Write out the serialization header value for this object. 802 // Write out the serialization header value for this object.
799 WriteInlinedHeader(object); 803 WriteInlinedHeader(object);
800 // Write out the class and tags information. 804 // Write out the class and tags information.
801 WriteIndexedObject(type == Utf8::kLatin1 ? kOneByteStringCid 805 WriteIndexedObject(type == Utf8::kLatin1 ? kOneByteStringCid
802 : kTwoByteStringCid); 806 : kTwoByteStringCid);
803 WriteIntptrValue(0); 807 WriteIntptrValue(0);
804 // Write string length, hash and content 808 // Write string length, hash and content
805 WriteSmi(len); 809 WriteSmi(len);
806 WriteSmi(0); // TODO(sgjesse): Hash - not written. 810 WriteSmi(0); // TODO(sgjesse): Hash - not written.
807 if (type == Utf8::kLatin1) { 811 if (type == Utf8::kLatin1) {
808 uint8_t* latin1_str = 812 uint8_t* latin1_str =
809 reinterpret_cast<uint8_t*>(::malloc(len * sizeof(uint8_t))); 813 reinterpret_cast<uint8_t*>(::malloc(len * sizeof(uint8_t)));
810 Utf8::DecodeToLatin1(utf8_str, utf8_len, latin1_str, len); 814 bool success = Utf8::DecodeToLatin1(utf8_str,
815 utf8_len,
816 latin1_str,
817 len);
818 ASSERT(success);
811 for (intptr_t i = 0; i < len; i++) { 819 for (intptr_t i = 0; i < len; i++) {
812 Write<uint8_t>(latin1_str[i]); 820 Write<uint8_t>(latin1_str[i]);
813 } 821 }
814 ::free(latin1_str); 822 ::free(latin1_str);
815 } else { 823 } else {
816 // TODO(sgjesse): Make sure surrogate pairs are handled. 824 // TODO(sgjesse): Make sure surrogate pairs are handled.
817 uint16_t* utf16_str = 825 uint16_t* utf16_str =
818 reinterpret_cast<uint16_t*>(::malloc(len * sizeof(uint16_t))); 826 reinterpret_cast<uint16_t*>(::malloc(len * sizeof(uint16_t)));
819 Utf8::DecodeToUTF16(utf8_str, utf8_len, utf16_str, len); 827 bool success = Utf8::DecodeToUTF16AllowSurrogates(utf8_str,
828 utf8_len,
829 utf16_str,
830 len);
831 ASSERT(success);
820 for (intptr_t i = 0; i < len; i++) { 832 for (intptr_t i = 0; i < len; i++) {
821 Write<uint16_t>(utf16_str[i]); 833 Write<uint16_t>(utf16_str[i]);
822 } 834 }
823 ::free(utf16_str); 835 ::free(utf16_str);
824 } 836 }
825 break; 837 break;
826 } 838 }
827 case Dart_CObject::kUint8Array: { 839 case Dart_CObject::kUint8Array: {
828 // Write out the serialization header value for this object. 840 // Write out the serialization header value for this object.
829 WriteInlinedHeader(object); 841 WriteInlinedHeader(object);
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
882 if (!success) { 894 if (!success) {
883 UnmarkAllCObjects(object); 895 UnmarkAllCObjects(object);
884 return false; 896 return false;
885 } 897 }
886 } 898 }
887 UnmarkAllCObjects(object); 899 UnmarkAllCObjects(object);
888 return true; 900 return true;
889 } 901 }
890 902
891 } // namespace dart 903 } // namespace dart
OLDNEW
« no previous file with comments | « no previous file | runtime/vm/snapshot_test.cc » ('j') | runtime/vm/unicode.h » ('J')

Powered by Google App Engine
This is Rietveld 408576698