| OLD | NEW |
| (Empty) |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 // A streaming validator for UTF-8. Validation is based on the definition in | |
| 6 // RFC-3629. In particular, it does not reject the invalid characters rejected | |
| 7 // by base::IsStringUTF8(). | |
| 8 // | |
| 9 // The implementation detects errors on the first possible byte. | |
| 10 | |
| 11 #ifndef BASE_I18N_STREAMING_UTF8_VALIDATOR_H_ | |
| 12 #define BASE_I18N_STREAMING_UTF8_VALIDATOR_H_ | |
| 13 | |
| 14 #include <string> | |
| 15 | |
| 16 #include "base/basictypes.h" | |
| 17 #include "base/i18n/base_i18n_export.h" | |
| 18 | |
| 19 namespace base { | |
| 20 | |
| 21 class BASE_I18N_EXPORT StreamingUtf8Validator { | |
| 22 public: | |
| 23 // The validator exposes 3 states. It starts in state VALID_ENDPOINT. As it | |
| 24 // processes characters it alternates between VALID_ENDPOINT and | |
| 25 // VALID_MIDPOINT. If it encounters an invalid byte or UTF-8 sequence the | |
| 26 // state changes permanently to INVALID. | |
| 27 enum State { | |
| 28 VALID_ENDPOINT, | |
| 29 VALID_MIDPOINT, | |
| 30 INVALID | |
| 31 }; | |
| 32 | |
| 33 StreamingUtf8Validator() : state_(0u) {} | |
| 34 // Trivial destructor intentionally omitted. | |
| 35 | |
| 36 // Validate |size| bytes starting at |data|. If the concatenation of all calls | |
| 37 // to AddBytes() since this object was constructed or reset is a valid UTF-8 | |
| 38 // string, returns VALID_ENDPOINT. If it could be the prefix of a valid UTF-8 | |
| 39 // string, returns VALID_MIDPOINT. If an invalid byte or UTF-8 sequence was | |
| 40 // present, returns INVALID. | |
| 41 State AddBytes(const char* data, size_t size); | |
| 42 | |
| 43 // Return the object to a freshly-constructed state so that it can be re-used. | |
| 44 void Reset(); | |
| 45 | |
| 46 // Validate a complete string using the same criteria. Returns true if the | |
| 47 // string only contains complete, valid UTF-8 codepoints. | |
| 48 static bool Validate(const std::string& string); | |
| 49 | |
| 50 private: | |
| 51 // The current state of the validator. Value 0 is the initial/valid state. | |
| 52 // The state is stored as an offset into |kUtf8ValidatorTables|. The special | |
| 53 // state |kUtf8InvalidState| is invalid. | |
| 54 uint8 state_; | |
| 55 | |
| 56 // This type could be made copyable but there is currently no use-case for | |
| 57 // it. | |
| 58 DISALLOW_COPY_AND_ASSIGN(StreamingUtf8Validator); | |
| 59 }; | |
| 60 | |
| 61 } // namespace base | |
| 62 | |
| 63 #endif // BASE_I18N_STREAMING_UTF8_VALIDATOR_H_ | |
| OLD | NEW |