OLD | NEW |
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef BASE_UTF_STRING_CONVERSION_UTILS_H_ | 5 #ifndef BASE_UTF_STRING_CONVERSION_UTILS_H_ |
6 #define BASE_UTF_STRING_CONVERSION_UTILS_H_ | 6 #define BASE_UTF_STRING_CONVERSION_UTILS_H_ |
7 | 7 |
8 // This should only be used by the various UTF string conversion files. | 8 // This should only be used by the various UTF string conversion files. |
9 | 9 |
10 #include "base/string16.h" | 10 #include "base/string16.h" |
11 | 11 |
12 namespace base { | 12 namespace base { |
13 | 13 |
14 inline bool IsValidCodepoint(uint32 code_point) { | 14 inline bool IsValidCodepoint(uint32 code_point) { |
15 // Excludes the surrogate code points ([0xD800, 0xDFFF]) and | 15 // Excludes the surrogate code points ([0xD800, 0xDFFF]) and |
16 // codepoints larger than 0x10FFFF (the highest codepoint allowed). | 16 // codepoints larger than 0x10FFFF (the highest codepoint allowed). |
17 // Non-characters and unassigned codepoints are allowed. | 17 // Non-characters and unassigned codepoints are allowed. |
18 return code_point < 0xD800u || | 18 return code_point < 0xD800u || |
19 (code_point >= 0xE000u && code_point <= 0x10FFFFu); | 19 (code_point >= 0xE000u && code_point <= 0x10FFFFu); |
20 } | 20 } |
21 | 21 |
| 22 inline bool IsValidCharacter(uint32 code_point) { |
| 23 // Excludes non-characters (U+FDD0..U+FDEF, and all codepoints ending in |
| 24 // 0xFFFE or 0xFFFF) from the set of valid code points. |
| 25 return code_point < 0xD800u || (code_point >= 0xE000u && |
| 26 code_point < 0xFDD0u) || (code_point > 0xFDEFu && |
| 27 code_point <= 0x10FFFFu && (code_point & 0xFFFEu) != 0xFFFEu); |
| 28 } |
| 29 |
22 // ReadUnicodeCharacter -------------------------------------------------------- | 30 // ReadUnicodeCharacter -------------------------------------------------------- |
23 | 31 |
24 // Reads a UTF-8 stream, placing the next code point into the given output | 32 // Reads a UTF-8 stream, placing the next code point into the given output |
25 // |*code_point|. |src| represents the entire string to read, and |*char_index| | 33 // |*code_point|. |src| represents the entire string to read, and |*char_index| |
26 // is the character offset within the string to start reading at. |*char_index| | 34 // is the character offset within the string to start reading at. |*char_index| |
27 // will be updated to index the last character read, such that incrementing it | 35 // will be updated to index the last character read, such that incrementing it |
28 // (as in a for loop) will take the reader to the next character. | 36 // (as in a for loop) will take the reader to the next character. |
29 // | 37 // |
30 // Returns true on success. On false, |*code_point| will be invalid. | 38 // Returns true on success. On false, |*code_point| will be invalid. |
31 bool ReadUnicodeCharacter(const char* src, | 39 bool ReadUnicodeCharacter(const char* src, |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
77 void PrepareForUTF8Output(const CHAR* src, size_t src_len, std::string* output); | 85 void PrepareForUTF8Output(const CHAR* src, size_t src_len, std::string* output); |
78 | 86 |
79 // Prepares an output buffer (containing either UTF-16 or -32 data) given some | 87 // Prepares an output buffer (containing either UTF-16 or -32 data) given some |
80 // UTF-8 input that will be converted to it. See PrepareForUTF8Output(). | 88 // UTF-8 input that will be converted to it. See PrepareForUTF8Output(). |
81 template<typename STRING> | 89 template<typename STRING> |
82 void PrepareForUTF16Or32Output(const char* src, size_t src_len, STRING* output); | 90 void PrepareForUTF16Or32Output(const char* src, size_t src_len, STRING* output); |
83 | 91 |
84 } // namespace base | 92 } // namespace base |
85 | 93 |
86 #endif // BASE_UTF_STRING_CONVERSION_UTILS_H_ | 94 #endif // BASE_UTF_STRING_CONVERSION_UTILS_H_ |
OLD | NEW |