OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "base/i18n/streaming_utf8_validator.h" | 5 #include "base/i18n/streaming_utf8_validator.h" |
6 | 6 |
| 7 #include <stddef.h> |
| 8 #include <stdint.h> |
7 #include <stdio.h> | 9 #include <stdio.h> |
8 #include <string.h> | 10 #include <string.h> |
9 | 11 |
10 #include <string> | 12 #include <string> |
11 | 13 |
| 14 #include "base/macros.h" |
12 #include "base/strings/string_piece.h" | 15 #include "base/strings/string_piece.h" |
13 #include "testing/gtest/include/gtest/gtest.h" | 16 #include "testing/gtest/include/gtest/gtest.h" |
14 | 17 |
15 // Define BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST to verify that this class | 18 // Define BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST to verify that this class |
16 // accepts exactly the same set of 4-byte strings as ICU-based validation. This | 19 // accepts exactly the same set of 4-byte strings as ICU-based validation. This |
17 // tests every possible 4-byte string, so it is too slow to run routinely on | 20 // tests every possible 4-byte string, so it is too slow to run routinely on |
18 // low-powered machines. | 21 // low-powered machines. |
19 // | 22 // |
20 // #define BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST | 23 // #define BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST |
21 | 24 |
22 #ifdef BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST | 25 #ifdef BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST |
23 | 26 |
24 #include "base/basictypes.h" | |
25 #include "base/bind.h" | 27 #include "base/bind.h" |
26 #include "base/location.h" | 28 #include "base/location.h" |
27 #include "base/logging.h" | 29 #include "base/logging.h" |
28 #include "base/memory/ref_counted.h" | 30 #include "base/memory/ref_counted.h" |
29 #include "base/strings/string_util.h" | 31 #include "base/strings/string_util.h" |
30 #include "base/strings/stringprintf.h" | 32 #include "base/strings/stringprintf.h" |
31 #include "base/strings/utf_string_conversion_utils.h" | 33 #include "base/strings/utf_string_conversion_utils.h" |
32 #include "base/synchronization/condition_variable.h" | 34 #include "base/synchronization/condition_variable.h" |
33 #include "base/synchronization/lock.h" | 35 #include "base/synchronization/lock.h" |
34 #include "base/threading/sequenced_worker_pool.h" | 36 #include "base/threading/sequenced_worker_pool.h" |
35 #include "third_party/icu/source/common/unicode/utf8.h" | 37 #include "third_party/icu/source/common/unicode/utf8.h" |
36 | 38 |
37 #endif // BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST | 39 #endif // BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST |
38 | 40 |
39 namespace base { | 41 namespace base { |
40 namespace { | 42 namespace { |
41 | 43 |
42 // Avoid having to qualify the enum values in the tests. | 44 // Avoid having to qualify the enum values in the tests. |
43 const StreamingUtf8Validator::State VALID_ENDPOINT = | 45 const StreamingUtf8Validator::State VALID_ENDPOINT = |
44 StreamingUtf8Validator::VALID_ENDPOINT; | 46 StreamingUtf8Validator::VALID_ENDPOINT; |
45 const StreamingUtf8Validator::State VALID_MIDPOINT = | 47 const StreamingUtf8Validator::State VALID_MIDPOINT = |
46 StreamingUtf8Validator::VALID_MIDPOINT; | 48 StreamingUtf8Validator::VALID_MIDPOINT; |
47 const StreamingUtf8Validator::State INVALID = StreamingUtf8Validator::INVALID; | 49 const StreamingUtf8Validator::State INVALID = StreamingUtf8Validator::INVALID; |
48 | 50 |
49 #ifdef BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST | 51 #ifdef BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST |
50 | 52 |
51 const uint32 kThoroughTestChunkSize = 1 << 24; | 53 const uint32_t kThoroughTestChunkSize = 1 << 24; |
52 | 54 |
53 class StreamingUtf8ValidatorThoroughTest : public ::testing::Test { | 55 class StreamingUtf8ValidatorThoroughTest : public ::testing::Test { |
54 protected: | 56 protected: |
55 StreamingUtf8ValidatorThoroughTest() | 57 StreamingUtf8ValidatorThoroughTest() |
56 : all_done_(&lock_), tasks_dispatched_(0), tasks_finished_(0) {} | 58 : all_done_(&lock_), tasks_dispatched_(0), tasks_finished_(0) {} |
57 | 59 |
58 // This uses the same logic as base::IsStringUTF8 except it considers | 60 // This uses the same logic as base::IsStringUTF8 except it considers |
59 // non-characters valid (and doesn't require a string as input). | 61 // non-characters valid (and doesn't require a string as input). |
60 static bool IsStringUtf8(const char* src, int32 src_len) { | 62 static bool IsStringUtf8(const char* src, int32_t src_len) { |
61 int32 char_index = 0; | 63 int32_t char_index = 0; |
62 | 64 |
63 while (char_index < src_len) { | 65 while (char_index < src_len) { |
64 int32 code_point; | 66 int32_t code_point; |
65 U8_NEXT(src, char_index, src_len, code_point); | 67 U8_NEXT(src, char_index, src_len, code_point); |
66 if (!base::IsValidCodepoint(code_point)) | 68 if (!base::IsValidCodepoint(code_point)) |
67 return false; | 69 return false; |
68 } | 70 } |
69 return true; | 71 return true; |
70 } | 72 } |
71 | 73 |
72 // Converts the passed-in integer to a 4 byte string and then | 74 // Converts the passed-in integer to a 4 byte string and then |
73 // verifies that IsStringUtf8 and StreamingUtf8Validator agree on | 75 // verifies that IsStringUtf8 and StreamingUtf8Validator agree on |
74 // whether it is valid UTF-8 or not. | 76 // whether it is valid UTF-8 or not. |
75 void TestNumber(uint32 n) const { | 77 void TestNumber(uint32_t n) const { |
76 char test[sizeof n]; | 78 char test[sizeof n]; |
77 memcpy(test, &n, sizeof n); | 79 memcpy(test, &n, sizeof n); |
78 StreamingUtf8Validator validator; | 80 StreamingUtf8Validator validator; |
79 EXPECT_EQ(IsStringUtf8(test, sizeof n), | 81 EXPECT_EQ(IsStringUtf8(test, sizeof n), |
80 validator.AddBytes(test, sizeof n) == VALID_ENDPOINT) | 82 validator.AddBytes(test, sizeof n) == VALID_ENDPOINT) |
81 << "Difference of opinion for \"" | 83 << "Difference of opinion for \"" |
82 << base::StringPrintf("\\x%02X\\x%02X\\x%02X\\x%02X", | 84 << base::StringPrintf("\\x%02X\\x%02X\\x%02X\\x%02X", |
83 test[0] & 0xFF, | 85 test[0] & 0xFF, |
84 test[1] & 0xFF, | 86 test[1] & 0xFF, |
85 test[2] & 0xFF, | 87 test[2] & 0xFF, |
86 test[3] & 0xFF) << "\""; | 88 test[3] & 0xFF) << "\""; |
87 } | 89 } |
88 | 90 |
89 public: | 91 public: |
90 // Tests the 4-byte sequences corresponding to the |size| integers | 92 // Tests the 4-byte sequences corresponding to the |size| integers |
91 // starting at |begin|. This is intended to be run from a worker | 93 // starting at |begin|. This is intended to be run from a worker |
92 // pool. Signals |all_done_| at the end if it thinks all tasks are | 94 // pool. Signals |all_done_| at the end if it thinks all tasks are |
93 // finished. | 95 // finished. |
94 void TestRange(uint32 begin, uint32 size) { | 96 void TestRange(uint32_t begin, uint32_t size) { |
95 for (uint32 i = 0; i < size; ++i) { | 97 for (uint32_t i = 0; i < size; ++i) { |
96 TestNumber(begin + i); | 98 TestNumber(begin + i); |
97 } | 99 } |
98 base::AutoLock al(lock_); | 100 base::AutoLock al(lock_); |
99 ++tasks_finished_; | 101 ++tasks_finished_; |
100 LOG(INFO) << tasks_finished_ << " / " << tasks_dispatched_ | 102 LOG(INFO) << tasks_finished_ << " / " << tasks_dispatched_ |
101 << " tasks done\n"; | 103 << " tasks done\n"; |
102 if (tasks_finished_ >= tasks_dispatched_) { | 104 if (tasks_finished_ >= tasks_dispatched_) { |
103 all_done_.Signal(); | 105 all_done_.Signal(); |
104 } | 106 } |
105 } | 107 } |
106 | 108 |
107 protected: | 109 protected: |
108 base::Lock lock_; | 110 base::Lock lock_; |
109 base::ConditionVariable all_done_; | 111 base::ConditionVariable all_done_; |
110 int tasks_dispatched_; | 112 int tasks_dispatched_; |
111 int tasks_finished_; | 113 int tasks_finished_; |
112 }; | 114 }; |
113 | 115 |
114 TEST_F(StreamingUtf8ValidatorThoroughTest, TestEverything) { | 116 TEST_F(StreamingUtf8ValidatorThoroughTest, TestEverything) { |
115 scoped_refptr<base::SequencedWorkerPool> pool = | 117 scoped_refptr<base::SequencedWorkerPool> pool = |
116 new base::SequencedWorkerPool(32, "TestEverything"); | 118 new base::SequencedWorkerPool(32, "TestEverything"); |
117 base::AutoLock al(lock_); | 119 base::AutoLock al(lock_); |
118 uint32 begin = 0; | 120 uint32_t begin = 0; |
119 do { | 121 do { |
120 pool->PostWorkerTask( | 122 pool->PostWorkerTask( |
121 FROM_HERE, | 123 FROM_HERE, |
122 base::Bind(&StreamingUtf8ValidatorThoroughTest::TestRange, | 124 base::Bind(&StreamingUtf8ValidatorThoroughTest::TestRange, |
123 base::Unretained(this), | 125 base::Unretained(this), |
124 begin, | 126 begin, |
125 kThoroughTestChunkSize)); | 127 kThoroughTestChunkSize)); |
126 ++tasks_dispatched_; | 128 ++tasks_dispatched_; |
127 begin += kThoroughTestChunkSize; | 129 begin += kThoroughTestChunkSize; |
128 } while (begin != 0); | 130 } while (begin != 0); |
(...skipping 274 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
403 TEST(StreamingUtf8ValidatorValidateTest, SimpleInvalidCase) { | 405 TEST(StreamingUtf8ValidatorValidateTest, SimpleInvalidCase) { |
404 EXPECT_FALSE(StreamingUtf8Validator::Validate("\xc0\x80")); | 406 EXPECT_FALSE(StreamingUtf8Validator::Validate("\xc0\x80")); |
405 } | 407 } |
406 | 408 |
407 TEST(StreamingUtf8ValidatorValidateTest, TruncatedIsInvalid) { | 409 TEST(StreamingUtf8ValidatorValidateTest, TruncatedIsInvalid) { |
408 EXPECT_FALSE(StreamingUtf8Validator::Validate("\xc2")); | 410 EXPECT_FALSE(StreamingUtf8Validator::Validate("\xc2")); |
409 } | 411 } |
410 | 412 |
411 } // namespace | 413 } // namespace |
412 } // namespace base | 414 } // namespace base |
OLD | NEW |