Chromium Code Reviews| Index: runtime/vm/unicode.h |
| diff --git a/runtime/vm/unicode.h b/runtime/vm/unicode.h |
| index 28beaad51592d6233b4db817b145260d57acdccb..d582d94b502d68b57d0e4f62f789f365f89f26ad 100644 |
| --- a/runtime/vm/unicode.h |
| +++ b/runtime/vm/unicode.h |
| @@ -12,6 +12,55 @@ namespace dart { |
| class String; |
| +class Utf16 : AllStatic { |
| + public: |
| + static const int32_t kMaxCodeUnit = 0xFFFF; |
| + static const int32_t kMaxCodePoint = 0x10FFFF; |
| + |
| + static bool IsLeadSurrogate(int32_t c) { |
| + return c >= kLeadBase && c < kLeadEnd; |
| + } |
| + |
| + static bool IsTrailSurrogate(int32_t c) { |
| + return c >= kTrailBase && c < kTrailEnd; |
| + } |
| + |
| + static bool IsSurrogate(int32_t c) { |
| + return (c & 0xFFFFF800u) == 0xD800u; |
| + } |
| + |
| + static int32_t CodePointFromCodeUnits(int32_t lead, int32_t trail) { |
|
Søren Gjesse
2012/11/19 14:18:36
Assert that lead is lead surrogare and that train
|
| + return kSurrogateEncodingBase + |
| + ((lead & kSurrogateMask) << 10) + (trail & kSurrogateMask); |
| + } |
| + |
| + static int32_t LeadFromCodePoint(int32_t code_point) { |
| + ASSERT(code_point >= kSurrogateEncodingBase); |
| + return kLeadBase + |
| + (((code_point - kSurrogateEncodingBase) >> 10) & kSurrogateMask); |
| + } |
| + |
| + static int32_t TrailFromCodePoint(int32_t code_point) { |
| + ASSERT(code_point >= kSurrogateEncodingBase); |
| + return kTrailBase + (code_point & kSurrogateMask); |
| + } |
| + |
| + // Gets the 21 bit Unicode code point at the given index in a string. If the |
| + // returned value is greater than kMaxCodePoint then the next position of the |
| + // string encodes a trail surrogate and should be skipped on iteration. May |
| + // return individual surrogate values if they are not part of a pair. |
| + static int32_t CodePointAt(const String& str, int index); |
| + |
| + private: |
| + static const int32_t kLeadBase = 0xD800; |
| + static const int32_t kLeadEnd = 0xDBFF; |
| + static const int32_t kTrailBase = 0xDC00; |
| + static const int32_t kTrailEnd = 0xDFFF; |
| + static const int32_t kSurrogateMask = 0x3FF; |
| + static const int32_t kSurrogateEncodingBase = 0x10000; |
| +}; |
| + |
| + |
| class Utf8 : AllStatic { |
| public: |
| enum Type { |
| @@ -24,14 +73,12 @@ class Utf8 : AllStatic { |
| static const intptr_t kMaxTwoByteChar = 0x7FF; |
| static const intptr_t kMaxThreeByteChar = 0xFFFF; |
| static const intptr_t kMaxFourByteChar = 0x10FFFF; |
| - static const intptr_t kMaxBmpCodepoint = 0xffff; |
| - static const int32_t kLeadOffset = (0xD800 - (0x10000 >> 10)); |
| - static const int32_t kSurrogateOffset = (0x10000 - (0xD800 << 10) - 0xDC00); |
| - static void ConvertUTF32ToUTF16(int32_t codepoint, uint16_t* dst); |
| - static intptr_t CodePointCount(const uint8_t* utf8_array, |
| - intptr_t array_len, |
| - Type* type); |
| + static const int32_t kInvalidCodePoint = -1; |
| + |
| + static intptr_t CodeUnitCount(const uint8_t* utf8_array, |
| + intptr_t array_len, |
| + Type* type); |
| // Returns true if 'utf8_array' is a valid UTF-8 string. |
| static bool IsValid(const uint8_t* utf8_array, intptr_t array_len); |
| @@ -56,10 +103,10 @@ class Utf8 : AllStatic { |
| intptr_t len); |
| static bool DecodeToUTF32(const uint8_t* utf8_array, |
| intptr_t array_len, |
| - uint32_t* dst, |
| + int32_t* dst, |
| intptr_t len); |
| static bool DecodeCStringToUTF32(const char* str, |
| - uint32_t* dst, |
| + int32_t* dst, |
| intptr_t len) { |
| ASSERT(str != NULL); |
| intptr_t array_len = strlen(str); |