OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // A streaming validator for UTF-8. Validation is based on the definition in | 5 // A streaming validator for UTF-8. Validation is based on the definition in |
6 // RFC-3629. In particular, it does not reject the invalid characters rejected | 6 // RFC-3629. In particular, it does not reject the invalid characters rejected |
7 // by base::IsStringUTF8(). | 7 // by base::IsStringUTF8(). |
8 // | 8 // |
9 // The implementation detects errors on the first possible byte. | 9 // The implementation detects errors on the first possible byte. |
10 | 10 |
11 #ifndef BASE_I18N_STREAMING_UTF8_VALIDATOR_H_ | 11 #ifndef BASE_I18N_STREAMING_UTF8_VALIDATOR_H_ |
12 #define BASE_I18N_STREAMING_UTF8_VALIDATOR_H_ | 12 #define BASE_I18N_STREAMING_UTF8_VALIDATOR_H_ |
13 | 13 |
| 14 #include <stddef.h> |
| 15 #include <stdint.h> |
| 16 |
14 #include <string> | 17 #include <string> |
15 | 18 |
16 #include "base/basictypes.h" | |
17 #include "base/i18n/base_i18n_export.h" | 19 #include "base/i18n/base_i18n_export.h" |
| 20 #include "base/macros.h" |
18 | 21 |
19 namespace base { | 22 namespace base { |
20 | 23 |
21 class BASE_I18N_EXPORT StreamingUtf8Validator { | 24 class BASE_I18N_EXPORT StreamingUtf8Validator { |
22 public: | 25 public: |
23 // The validator exposes 3 states. It starts in state VALID_ENDPOINT. As it | 26 // The validator exposes 3 states. It starts in state VALID_ENDPOINT. As it |
24 // processes characters it alternates between VALID_ENDPOINT and | 27 // processes characters it alternates between VALID_ENDPOINT and |
25 // VALID_MIDPOINT. If it encounters an invalid byte or UTF-8 sequence the | 28 // VALID_MIDPOINT. If it encounters an invalid byte or UTF-8 sequence the |
26 // state changes permanently to INVALID. | 29 // state changes permanently to INVALID. |
27 enum State { | 30 enum State { |
(...skipping 16 matching lines...) Expand all Loading... |
44 void Reset(); | 47 void Reset(); |
45 | 48 |
46 // Validate a complete string using the same criteria. Returns true if the | 49 // Validate a complete string using the same criteria. Returns true if the |
47 // string only contains complete, valid UTF-8 codepoints. | 50 // string only contains complete, valid UTF-8 codepoints. |
48 static bool Validate(const std::string& string); | 51 static bool Validate(const std::string& string); |
49 | 52 |
50 private: | 53 private: |
51 // The current state of the validator. Value 0 is the initial/valid state. | 54 // The current state of the validator. Value 0 is the initial/valid state. |
52 // The state is stored as an offset into |kUtf8ValidatorTables|. The special | 55 // The state is stored as an offset into |kUtf8ValidatorTables|. The special |
53 // state |kUtf8InvalidState| is invalid. | 56 // state |kUtf8InvalidState| is invalid. |
54 uint8 state_; | 57 uint8_t state_; |
55 | 58 |
56 // This type could be made copyable but there is currently no use-case for | 59 // This type could be made copyable but there is currently no use-case for |
57 // it. | 60 // it. |
58 DISALLOW_COPY_AND_ASSIGN(StreamingUtf8Validator); | 61 DISALLOW_COPY_AND_ASSIGN(StreamingUtf8Validator); |
59 }; | 62 }; |
60 | 63 |
61 } // namespace base | 64 } // namespace base |
62 | 65 |
63 #endif // BASE_I18N_STREAMING_UTF8_VALIDATOR_H_ | 66 #endif // BASE_I18N_STREAMING_UTF8_VALIDATOR_H_ |
OLD | NEW |