OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // This implementation doesn't use ICU. The ICU macros are oriented towards | 5 // This implementation doesn't use ICU. The ICU macros are oriented towards |
6 // character-at-a-time processing, whereas byte-at-a-time processing is easier | 6 // character-at-a-time processing, whereas byte-at-a-time processing is easier |
7 // with streaming input. | 7 // with streaming input. |
8 | 8 |
9 #include "base/i18n/streaming_utf8_validator.h" | 9 #include "base/i18n/streaming_utf8_validator.h" |
10 | 10 |
11 #include "base/i18n/utf8_validator_tables.h" | 11 #include "base/i18n/utf8_validator_tables.h" |
12 #include "base/logging.h" | 12 #include "base/logging.h" |
13 | 13 |
14 namespace base { | 14 namespace base { |
15 namespace { | 15 namespace { |
16 | 16 |
17 uint8 StateTableLookup(uint8 offset) { | 17 uint8_t StateTableLookup(uint8_t offset) { |
18 DCHECK_LT(offset, internal::kUtf8ValidatorTablesSize); | 18 DCHECK_LT(offset, internal::kUtf8ValidatorTablesSize); |
19 return internal::kUtf8ValidatorTables[offset]; | 19 return internal::kUtf8ValidatorTables[offset]; |
20 } | 20 } |
21 | 21 |
22 } // namespace | 22 } // namespace |
23 | 23 |
24 StreamingUtf8Validator::State StreamingUtf8Validator::AddBytes(const char* data, | 24 StreamingUtf8Validator::State StreamingUtf8Validator::AddBytes(const char* data, |
25 size_t size) { | 25 size_t size) { |
26 // Copy |state_| into a local variable so that the compiler doesn't have to be | 26 // Copy |state_| into a local variable so that the compiler doesn't have to be |
27 // careful of aliasing. | 27 // careful of aliasing. |
28 uint8 state = state_; | 28 uint8_t state = state_; |
29 for (const char* p = data; p != data + size; ++p) { | 29 for (const char* p = data; p != data + size; ++p) { |
30 if ((*p & 0x80) == 0) { | 30 if ((*p & 0x80) == 0) { |
31 if (state == 0) | 31 if (state == 0) |
32 continue; | 32 continue; |
33 state = internal::I18N_UTF8_VALIDATOR_INVALID_INDEX; | 33 state = internal::I18N_UTF8_VALIDATOR_INVALID_INDEX; |
34 break; | 34 break; |
35 } | 35 } |
36 const uint8 shift_amount = StateTableLookup(state); | 36 const uint8_t shift_amount = StateTableLookup(state); |
37 const uint8 shifted_char = (*p & 0x7F) >> shift_amount; | 37 const uint8_t shifted_char = (*p & 0x7F) >> shift_amount; |
38 state = StateTableLookup(state + shifted_char + 1); | 38 state = StateTableLookup(state + shifted_char + 1); |
39 // State may be INVALID here, but this code is optimised for the case of | 39 // State may be INVALID here, but this code is optimised for the case of |
40 // valid UTF-8 and it is more efficient (by about 2%) to not attempt an | 40 // valid UTF-8 and it is more efficient (by about 2%) to not attempt an |
41 // early loop exit unless we hit an ASCII character. | 41 // early loop exit unless we hit an ASCII character. |
42 } | 42 } |
43 state_ = state; | 43 state_ = state; |
44 return state == 0 ? VALID_ENDPOINT | 44 return state == 0 ? VALID_ENDPOINT |
45 : state == internal::I18N_UTF8_VALIDATOR_INVALID_INDEX | 45 : state == internal::I18N_UTF8_VALIDATOR_INVALID_INDEX |
46 ? INVALID | 46 ? INVALID |
47 : VALID_MIDPOINT; | 47 : VALID_MIDPOINT; |
48 } | 48 } |
49 | 49 |
50 void StreamingUtf8Validator::Reset() { | 50 void StreamingUtf8Validator::Reset() { |
51 state_ = 0u; | 51 state_ = 0u; |
52 } | 52 } |
53 | 53 |
54 bool StreamingUtf8Validator::Validate(const std::string& string) { | 54 bool StreamingUtf8Validator::Validate(const std::string& string) { |
55 return StreamingUtf8Validator().AddBytes(string.data(), string.size()) == | 55 return StreamingUtf8Validator().AddBytes(string.data(), string.size()) == |
56 VALID_ENDPOINT; | 56 VALID_ENDPOINT; |
57 } | 57 } |
58 | 58 |
59 } // namespace base | 59 } // namespace base |
OLD | NEW |