Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(489)

Unified Diff: runtime/vm/object.h

Issue 11368138: Add some support for the code-point code-unit distinction. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 8 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: runtime/vm/object.h
diff --git a/runtime/vm/object.h b/runtime/vm/object.h
index 2836dda969ab3435a680294b02d436ae91c882f3..97d4f5c708d0d5e7ec5f4fee602326794fa4f2ed 100644
--- a/runtime/vm/object.h
+++ b/runtime/vm/object.h
@@ -17,6 +17,7 @@
#include "vm/os.h"
#include "vm/raw_object.h"
#include "vm/scanner.h"
+#include "vm/unicode.h"
namespace dart {
@@ -3684,7 +3685,8 @@ class String : public Instance {
static intptr_t Hash(const uint16_t* characters, intptr_t len);
static intptr_t Hash(const uint32_t* characters, intptr_t len);
- int32_t CharAt(intptr_t index) const;
+ uint32_t CharAt(intptr_t index) const;
+ uint32_t CodeUnitAt(intptr_t index) const;
intptr_t CharSize() const;
@@ -3850,7 +3852,11 @@ class String : public Instance {
class OneByteString : public AllStatic {
public:
- static int32_t CharAt(const String& str, intptr_t index) {
+ static uint32_t CharAt(const String& str, intptr_t index) {
+ return *CharAddr(str, index);
+ }
+
+ static uint32_t CodeUnitAt(const String& str, intptr_t index) {
return *CharAddr(str, index);
}
@@ -3907,6 +3913,7 @@ class OneByteString : public AllStatic {
static RawOneByteString* Transform(int32_t (*mapping)(int32_t ch),
const String& str,
+ int out_length,
Heap::Space space);
static const ClassId kClassId = kOneByteStringCid;
@@ -3944,7 +3951,19 @@ class OneByteString : public AllStatic {
class TwoByteString : public AllStatic {
public:
- static int32_t CharAt(const String& str, intptr_t index) {
+ static uint32_t CharAt(const String& str, intptr_t index) {
+ int32_t unit = *CharAddr(str, index);
+ // For non-surrogate values or incorrect trailing surrogates we just return
+ // the value.
+ if (!Utf16::IsLeadSurrogate(unit)) return unit;
+
+ // If the string ends with a lead surrogate we just return that.
+ if (index + 1 >= str.Length()) return unit;
+
+ return Utf16::CodePointFromCodeUnits(unit, *CharAddr(str, index + 1));
+ }
+
+ static uint32_t CodeUnitAt(const String& str, intptr_t index) {
return *CharAddr(str, index);
}
siva 2012/11/08 19:06:47 I don't mind changing the name 'CharAt' to 'CodeU
erikcorry 2012/11/08 22:09:34 Do you want to move the C++ support for toUpperCas
siva 2012/11/09 02:40:42 We could fix String::Transform/TwoByteString::Tran
erikcorry 2012/11/15 13:28:25 I have fixed the UTF8 and the Transform (toUpper/L
@@ -3990,6 +4009,7 @@ class TwoByteString : public AllStatic {
static RawTwoByteString* Transform(int32_t (*mapping)(int32_t ch),
const String& str,
+ int out_length,
Heap::Space space);
static RawTwoByteString* null() {
@@ -4028,7 +4048,11 @@ class TwoByteString : public AllStatic {
class ExternalOneByteString : public AllStatic {
public:
- static int32_t CharAt(const String& str, intptr_t index) {
+ static uint32_t CharAt(const String& str, intptr_t index) {
+ return *CharAddr(str, index);
+ }
+
+ static uint32_t CodeUnitAt(const String& str, intptr_t index) {
return *CharAddr(str, index);
}
@@ -4094,7 +4118,19 @@ class ExternalOneByteString : public AllStatic {
class ExternalTwoByteString : public AllStatic {
public:
- static int32_t CharAt(const String& str, intptr_t index) {
+ static uint32_t CharAt(const String& str, intptr_t index) {
+ int32_t unit = *CharAddr(str, index);
floitsch 2012/11/08 15:28:21 uint32_t unit =
erikcorry 2012/11/15 13:28:25 This code is gone.
+ // For non-surrogate values or incorrect trailing surrogates we just return
+ // the value.
+ if (!Utf16::IsLeadSurrogate(unit)) return unit;
+
+ // If the string ends with a lead surrogate we just return that.
+ if (index + 1 >= str.Length()) return unit;
+
+ return Utf16::CodePointFromCodeUnits(unit, *CharAddr(str, index + 1));
+ }
+
+ static uint32_t CodeUnitAt(const String& str, intptr_t index) {
return *CharAddr(str, index);
}
@@ -5858,7 +5894,7 @@ bool String::Equals(const String& str,
return false; // Lengths don't match.
}
for (intptr_t i = 0; i < len; i++) {
- if (this->CharAt(i) != str.CharAt(begin_index + i)) {
+ if (this->CodeUnitAt(i) != str.CodeUnitAt(begin_index + i)) {
return false;
}
}

Powered by Google App Engine
This is Rietveld 408576698