| Index: runtime/vm/unicode.h
|
| diff --git a/runtime/vm/unicode.h b/runtime/vm/unicode.h
|
| index 03a4b29d879898c2c83f06aac4a175d4729db209..6e6e1c038abbc1d50ce332c26d7b3340f22478a6 100644
|
| --- a/runtime/vm/unicode.h
|
| +++ b/runtime/vm/unicode.h
|
| @@ -1,4 +1,4 @@
|
| -// Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
|
| +// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
|
| // for details. All rights reserved. Use of this source code is governed by a
|
| // BSD-style license that can be found in the LICENSE file.
|
|
|
| @@ -29,7 +29,8 @@ class Utf8 : AllStatic {
|
| intptr_t array_len,
|
| Type* type);
|
|
|
| - // Returns true if 'utf8_array' is a valid UTF-8 string.
|
| + // Returns true if 'utf8_array' is a valid UTF-8 string. UTF-8 encoded UTF-16
|
| + // surrogate code units are allowed.
|
| static bool IsValid(const uint8_t* utf8_array, intptr_t array_len);
|
|
|
| static intptr_t Length(int32_t ch);
|
| @@ -41,7 +42,6 @@ class Utf8 : AllStatic {
|
| static intptr_t Decode(const uint8_t* utf8_array,
|
| intptr_t array_len,
|
| int32_t* ch);
|
| -
|
| static bool DecodeToLatin1(const uint8_t* utf8_array,
|
| intptr_t array_len,
|
| uint8_t* dst,
|
| @@ -73,6 +73,36 @@ class Utf16 : AllStatic {
|
|
|
| static const int32_t kSurrogateOffset = (0x10000 - (0xD800 << 10) - 0xDC00);
|
|
|
| + class CodePointIterator {
|
| + public:
|
| + CodePointIterator(const uint16_t* utf16_array, intptr_t array_len)
|
| + : utf16_array_(utf16_array),
|
| + array_len_(array_len),
|
| + index_(-1),
|
| + ch_(-1) {
|
| + }
|
| +
|
| + int32_t Current() const {
|
| + ASSERT(index_ >= 0);
|
| + ASSERT(index_ < array_len_);
|
| + return ch_;
|
| + }
|
| +
|
| + bool Next();
|
| +
|
| + void Reset() {
|
| + index_ = -1;
|
| + ch_ = -1;
|
| + }
|
| +
|
| + private:
|
| + const uint16_t* utf16_array_;
|
| + intptr_t array_len_;
|
| + intptr_t index_;
|
| + int32_t ch_;
|
| + DISALLOW_IMPLICIT_CONSTRUCTORS(CodePointIterator);
|
| + };
|
| +
|
| // Returns the length of the code point in UTF-16 code units.
|
| static intptr_t Length(int32_t ch) {
|
| return (ch <= kMaxBmpCodepoint) ? 1 : 2;
|
|
|