Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(31)

Unified Diff: runtime/lib/string.cc

Issue 11368138: Add some support for the code-point code-unit distinction. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 8 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: runtime/lib/string.cc
diff --git a/runtime/lib/string.cc b/runtime/lib/string.cc
index 7707ed63abcb199f12128f13646516d188b0a0dc..8d7a09e0369154793b954063def8ebdd21544b61 100644
--- a/runtime/lib/string.cc
+++ b/runtime/lib/string.cc
@@ -29,15 +29,16 @@ DEFINE_NATIVE_ENTRY(StringBase_createFromCodePoints, 1) {
args.Add(&index_object);
Exceptions::ThrowByType(Exceptions::kArgument, args);
}
- intptr_t value = Smi::Cast(index_object).Value();
- if (value < 0) {
+ uint32_t value = Smi::Cast(index_object).Value();
floitsch 2012/11/08 15:28:21 I don't think this works. On 64bit machines a Smi
erikcorry 2012/11/15 13:28:25 Done.
+ if (Utf16::IsSurrogate(value) ||
+ value > Utf16::kMaxCodePoint) {
GrowableArray<const Object*> args;
Exceptions::ThrowByType(Exceptions::kArgument, args);
} else {
if (value > 0x7F) {
is_one_byte_string = false;
}
- if (value > 0xFFFF) {
+ if (value > Utf16::kMaxCodeUnit) {
utf16_len += 1;
}
}
@@ -50,6 +51,41 @@ DEFINE_NATIVE_ENTRY(StringBase_createFromCodePoints, 1) {
}
+DEFINE_NATIVE_ENTRY(StringBase_createFromCodeUnits, 1) {
+ GET_NATIVE_ARGUMENT(Array, a, arguments->At(0));
+ // TODO(srdjan): Check that parameterized type is an int.
+ Zone* zone = isolate->current_zone();
+ intptr_t array_len = a.Length();
+
+ // Unbox the array and determine the maximum element width.
+ bool is_one_byte_string = true;
+ uint32_t* utf32_array = zone->Alloc<uint32_t>(array_len);
+ Object& index_object = Object::Handle(isolate);
+ for (intptr_t i = 0; i < array_len; i++) {
+ index_object = a.At(i);
+ if (!index_object.IsSmi()) {
+ GrowableArray<const Object*> args;
+ args.Add(&index_object);
+ Exceptions::ThrowByType(Exceptions::kArgument, args);
+ }
+ uint32_t value = Smi::Cast(index_object).Value();
floitsch 2012/11/08 15:28:21 ditto.
erikcorry 2012/11/15 13:28:25 This code is now gone.
+ if (value > Utf16::kMaxCodeUnit) {
+ GrowableArray<const Object*> args;
+ Exceptions::ThrowByType(Exceptions::kArgument, args);
+ } else {
+ if (value > 0x7F) {
+ is_one_byte_string = false;
+ }
+ }
+ utf32_array[i] = value;
+ }
+ if (is_one_byte_string) {
+ return OneByteString::New(utf32_array, array_len, Heap::kNew);
+ }
+ return TwoByteString::New(array_len, utf32_array, array_len, Heap::kNew);
+}
+
+
DEFINE_NATIVE_ENTRY(StringBase_substringUnchecked, 3) {
GET_NATIVE_ARGUMENT(String, receiver, arguments->At(0));
GET_NATIVE_ARGUMENT(Smi, start_obj, arguments->At(1));
@@ -97,20 +133,56 @@ static int32_t StringValueAt(const String& str, const Integer& index) {
}
+static int32_t StringCodeUnitAt(const String& str, const Integer& index) {
+ if (index.IsSmi()) {
+ Smi& smi = Smi::Handle();
+ smi ^= index.raw();
+ int32_t index = smi.Value();
+ if ((index < 0) || (index >= str.Length())) {
+ GrowableArray<const Object*> arguments;
+ arguments.Add(&smi);
+ Exceptions::ThrowByType(Exceptions::kRange, arguments);
+ }
+ return str.CodeUnitAt(index);
+ } else {
+ // An index larger than Smi is always illegal.
+ GrowableArray<const Object*> arguments;
+ arguments.Add(&index);
+ Exceptions::ThrowByType(Exceptions::kRange, arguments);
+ return 0;
+ }
+}
+
+
DEFINE_NATIVE_ENTRY(String_charAt, 2) {
const String& receiver = String::CheckedHandle(arguments->At(0));
GET_NATIVE_ARGUMENT(Integer, index, arguments->At(1));
uint32_t value = StringValueAt(receiver, index);
- ASSERT(value <= 0x10FFFF);
- return Symbols::New(&value, 1);
+ if (value <= Utf16::kMaxCodeUnit) {
floitsch 2012/11/08 15:28:21 As discussed: This should just return a String wit
erikcorry 2012/11/15 13:28:25 Done.
+ return Symbols::New(&value, 1);
cshapiro 2012/11/09 04:43:57 This is generally not a good idea as the intern ta
erikcorry 2012/11/12 20:22:17 It already interned character strings, I did not c
cshapiro 2012/11/12 21:51:13 Yes, what you say is all true. This change is an
erikcorry 2012/11/15 13:28:25 I fixed it so it only interns ASCII characters.
+ }
+ ASSERT(value <= Utf16::kMaxCodePoint);
+ uint32_t values[2];
+ values[0] = Utf16::LeadFromCodePoint(value);
+ values[1] = Utf16::TrailFromCodePoint(value);
+ return Symbols::New(&values[0], 2);
}
+
DEFINE_NATIVE_ENTRY(String_charCodeAt, 2) {
const String& receiver = String::CheckedHandle(arguments->At(0));
GET_NATIVE_ARGUMENT(Integer, index, arguments->At(1));
- int32_t value = StringValueAt(receiver, index);
- ASSERT(value >= 0);
- ASSERT(value <= 0x10FFFF);
+ uint32_t value = StringValueAt(receiver, index);
+ ASSERT(value <= Utf16::kMaxCodePoint);
+ return Smi::New(value);
+}
+
+
+DEFINE_NATIVE_ENTRY(String_codeUnitAt, 2) {
+ const String& receiver = String::CheckedHandle(arguments->At(0));
+ GET_NATIVE_ARGUMENT(Integer, index, arguments->At(1));
+ uint32_t value = StringCodeUnitAt(receiver, index);
+ ASSERT(value <= Utf16::kMaxCodeUnit);
return Smi::New(value);
}

Powered by Google App Engine
This is Rietveld 408576698