| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef BASE_I18N_CHAR_ITERATOR_H_ | 5 #ifndef BASE_I18N_CHAR_ITERATOR_H_ |
| 6 #define BASE_I18N_CHAR_ITERATOR_H_ | 6 #define BASE_I18N_CHAR_ITERATOR_H_ |
| 7 | 7 |
| 8 #include <stddef.h> |
| 9 #include <stdint.h> |
| 10 |
| 8 #include <string> | 11 #include <string> |
| 9 | 12 |
| 10 #include "base/basictypes.h" | |
| 11 #include "base/i18n/base_i18n_export.h" | 13 #include "base/i18n/base_i18n_export.h" |
| 14 #include "base/macros.h" |
| 12 #include "base/strings/string16.h" | 15 #include "base/strings/string16.h" |
| 16 #include "build/build_config.h" |
| 13 | 17 |
| 14 // The CharIterator classes iterate through the characters in UTF8 and | 18 // The CharIterator classes iterate through the characters in UTF8 and |
| 15 // UTF16 strings. Example usage: | 19 // UTF16 strings. Example usage: |
| 16 // | 20 // |
| 17 // UTF8CharIterator iter(&str); | 21 // UTF8CharIterator iter(&str); |
| 18 // while (!iter.end()) { | 22 // while (!iter.end()) { |
| 19 // VLOG(1) << iter.get(); | 23 // VLOG(1) << iter.get(); |
| 20 // iter.Advance(); | 24 // iter.Advance(); |
| 21 // } | 25 // } |
| 22 | 26 |
| 23 #if defined(OS_WIN) | 27 #if defined(OS_WIN) |
| 24 typedef unsigned char uint8_t; | 28 typedef unsigned char uint8_t; |
| 25 #endif | 29 #endif |
| 26 | 30 |
| 27 namespace base { | 31 namespace base { |
| 28 namespace i18n { | 32 namespace i18n { |
| 29 | 33 |
| 30 class BASE_I18N_EXPORT UTF8CharIterator { | 34 class BASE_I18N_EXPORT UTF8CharIterator { |
| 31 public: | 35 public: |
| 32 // Requires |str| to live as long as the UTF8CharIterator does. | 36 // Requires |str| to live as long as the UTF8CharIterator does. |
| 33 explicit UTF8CharIterator(const std::string* str); | 37 explicit UTF8CharIterator(const std::string* str); |
| 34 ~UTF8CharIterator(); | 38 ~UTF8CharIterator(); |
| 35 | 39 |
| 36 // Return the starting array index of the current character within the | 40 // Return the starting array index of the current character within the |
| 37 // string. | 41 // string. |
| 38 int32 array_pos() const { return array_pos_; } | 42 int32_t array_pos() const { return array_pos_; } |
| 39 | 43 |
| 40 // Return the logical index of the current character, independent of the | 44 // Return the logical index of the current character, independent of the |
| 41 // number of bytes each character takes. | 45 // number of bytes each character takes. |
| 42 int32 char_pos() const { return char_pos_; } | 46 int32_t char_pos() const { return char_pos_; } |
| 43 | 47 |
| 44 // Return the current char. | 48 // Return the current char. |
| 45 int32 get() const { return char_; } | 49 int32_t get() const { return char_; } |
| 46 | 50 |
| 47 // Returns true if we're at the end of the string. | 51 // Returns true if we're at the end of the string. |
| 48 bool end() const { return array_pos_ == len_; } | 52 bool end() const { return array_pos_ == len_; } |
| 49 | 53 |
| 50 // Advance to the next actual character. Returns false if we're at the | 54 // Advance to the next actual character. Returns false if we're at the |
| 51 // end of the string. | 55 // end of the string. |
| 52 bool Advance(); | 56 bool Advance(); |
| 53 | 57 |
| 54 private: | 58 private: |
| 55 // The string we're iterating over. | 59 // The string we're iterating over. |
| 56 const uint8_t* str_; | 60 const uint8_t* str_; |
| 57 | 61 |
| 58 // The length of the encoded string. | 62 // The length of the encoded string. |
| 59 int32 len_; | 63 int32_t len_; |
| 60 | 64 |
| 61 // Array index. | 65 // Array index. |
| 62 int32 array_pos_; | 66 int32_t array_pos_; |
| 63 | 67 |
| 64 // The next array index. | 68 // The next array index. |
| 65 int32 next_pos_; | 69 int32_t next_pos_; |
| 66 | 70 |
| 67 // Character index. | 71 // Character index. |
| 68 int32 char_pos_; | 72 int32_t char_pos_; |
| 69 | 73 |
| 70 // The current character. | 74 // The current character. |
| 71 int32 char_; | 75 int32_t char_; |
| 72 | 76 |
| 73 DISALLOW_COPY_AND_ASSIGN(UTF8CharIterator); | 77 DISALLOW_COPY_AND_ASSIGN(UTF8CharIterator); |
| 74 }; | 78 }; |
| 75 | 79 |
| 76 class BASE_I18N_EXPORT UTF16CharIterator { | 80 class BASE_I18N_EXPORT UTF16CharIterator { |
| 77 public: | 81 public: |
| 78 // Requires |str| to live as long as the UTF16CharIterator does. | 82 // Requires |str| to live as long as the UTF16CharIterator does. |
| 79 explicit UTF16CharIterator(const string16* str); | 83 explicit UTF16CharIterator(const string16* str); |
| 80 UTF16CharIterator(const char16* str, size_t str_len); | 84 UTF16CharIterator(const char16* str, size_t str_len); |
| 81 ~UTF16CharIterator(); | 85 ~UTF16CharIterator(); |
| 82 | 86 |
| 83 // Return the starting array index of the current character within the | 87 // Return the starting array index of the current character within the |
| 84 // string. | 88 // string. |
| 85 int32 array_pos() const { return array_pos_; } | 89 int32_t array_pos() const { return array_pos_; } |
| 86 | 90 |
| 87 // Return the logical index of the current character, independent of the | 91 // Return the logical index of the current character, independent of the |
| 88 // number of codewords each character takes. | 92 // number of codewords each character takes. |
| 89 int32 char_pos() const { return char_pos_; } | 93 int32_t char_pos() const { return char_pos_; } |
| 90 | 94 |
| 91 // Return the current char. | 95 // Return the current char. |
| 92 int32 get() const { return char_; } | 96 int32_t get() const { return char_; } |
| 93 | 97 |
| 94 // Returns true if we're at the end of the string. | 98 // Returns true if we're at the end of the string. |
| 95 bool end() const { return array_pos_ == len_; } | 99 bool end() const { return array_pos_ == len_; } |
| 96 | 100 |
| 97 // Advance to the next actual character. Returns false if we're at the | 101 // Advance to the next actual character. Returns false if we're at the |
| 98 // end of the string. | 102 // end of the string. |
| 99 bool Advance(); | 103 bool Advance(); |
| 100 | 104 |
| 101 private: | 105 private: |
| 102 // Fills in the current character we found and advances to the next | 106 // Fills in the current character we found and advances to the next |
| 103 // character, updating all flags as necessary. | 107 // character, updating all flags as necessary. |
| 104 void ReadChar(); | 108 void ReadChar(); |
| 105 | 109 |
| 106 // The string we're iterating over. | 110 // The string we're iterating over. |
| 107 const char16* str_; | 111 const char16* str_; |
| 108 | 112 |
| 109 // The length of the encoded string. | 113 // The length of the encoded string. |
| 110 int32 len_; | 114 int32_t len_; |
| 111 | 115 |
| 112 // Array index. | 116 // Array index. |
| 113 int32 array_pos_; | 117 int32_t array_pos_; |
| 114 | 118 |
| 115 // The next array index. | 119 // The next array index. |
| 116 int32 next_pos_; | 120 int32_t next_pos_; |
| 117 | 121 |
| 118 // Character index. | 122 // Character index. |
| 119 int32 char_pos_; | 123 int32_t char_pos_; |
| 120 | 124 |
| 121 // The current character. | 125 // The current character. |
| 122 int32 char_; | 126 int32_t char_; |
| 123 | 127 |
| 124 DISALLOW_COPY_AND_ASSIGN(UTF16CharIterator); | 128 DISALLOW_COPY_AND_ASSIGN(UTF16CharIterator); |
| 125 }; | 129 }; |
| 126 | 130 |
| 127 } // namespace i18n | 131 } // namespace i18n |
| 128 } // namespace base | 132 } // namespace base |
| 129 | 133 |
| 130 #endif // BASE_I18N_CHAR_ITERATOR_H_ | 134 #endif // BASE_I18N_CHAR_ITERATOR_H_ |
| OLD | NEW |