Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(27)

Side by Side Diff: base/i18n/streaming_utf8_validator.h

Issue 1647803004: Move base to DEPS (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « base/i18n/rtl_unittest.cc ('k') | base/i18n/streaming_utf8_validator.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // A streaming validator for UTF-8. Validation is based on the definition in
6 // RFC-3629. In particular, it does not reject the invalid characters rejected
7 // by base::IsStringUTF8().
8 //
9 // The implementation detects errors on the first possible byte.
10
11 #ifndef BASE_I18N_STREAMING_UTF8_VALIDATOR_H_
12 #define BASE_I18N_STREAMING_UTF8_VALIDATOR_H_
13
14 #include <string>
15
16 #include "base/basictypes.h"
17 #include "base/i18n/base_i18n_export.h"
18
19 namespace base {
20
21 class BASE_I18N_EXPORT StreamingUtf8Validator {
22 public:
23 // The validator exposes 3 states. It starts in state VALID_ENDPOINT. As it
24 // processes characters it alternates between VALID_ENDPOINT and
25 // VALID_MIDPOINT. If it encounters an invalid byte or UTF-8 sequence the
26 // state changes permanently to INVALID.
27 enum State {
28 VALID_ENDPOINT,
29 VALID_MIDPOINT,
30 INVALID
31 };
32
33 StreamingUtf8Validator() : state_(0u) {}
34 // Trivial destructor intentionally omitted.
35
36 // Validate |size| bytes starting at |data|. If the concatenation of all calls
37 // to AddBytes() since this object was constructed or reset is a valid UTF-8
38 // string, returns VALID_ENDPOINT. If it could be the prefix of a valid UTF-8
39 // string, returns VALID_MIDPOINT. If an invalid byte or UTF-8 sequence was
40 // present, returns INVALID.
41 State AddBytes(const char* data, size_t size);
42
43 // Return the object to a freshly-constructed state so that it can be re-used.
44 void Reset();
45
46 // Validate a complete string using the same criteria. Returns true if the
47 // string only contains complete, valid UTF-8 codepoints.
48 static bool Validate(const std::string& string);
49
50 private:
51 // The current state of the validator. Value 0 is the initial/valid state.
52 // The state is stored as an offset into |kUtf8ValidatorTables|. The special
53 // state |kUtf8InvalidState| is invalid.
54 uint8 state_;
55
56 // This type could be made copyable but there is currently no use-case for
57 // it.
58 DISALLOW_COPY_AND_ASSIGN(StreamingUtf8Validator);
59 };
60
61 } // namespace base
62
63 #endif // BASE_I18N_STREAMING_UTF8_VALIDATOR_H_
OLDNEW
« no previous file with comments | « base/i18n/rtl_unittest.cc ('k') | base/i18n/streaming_utf8_validator.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698