| OLD | NEW |
| (Empty) |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 // All data that is passed through a WebSocket with type "Text" needs to be | |
| 6 // validated as UTF8. Since this is done on the IO thread, it needs to be | |
| 7 // reasonably fast. | |
| 8 | |
| 9 // We are only interested in the performance on valid UTF8. Invalid UTF8 will | |
| 10 // result in a connection failure, so is unlikely to become a source of | |
| 11 // performance issues. | |
| 12 | |
| 13 #include "base/i18n/streaming_utf8_validator.h" | |
| 14 | |
| 15 #include <string> | |
| 16 | |
| 17 #include "base/basictypes.h" | |
| 18 #include "base/bind.h" | |
| 19 #include "base/callback.h" | |
| 20 #include "base/strings/string_util.h" | |
| 21 #include "base/strings/stringprintf.h" | |
| 22 #include "base/test/perf_time_logger.h" | |
| 23 #include "testing/gtest/include/gtest/gtest.h" | |
| 24 | |
| 25 namespace base { | |
| 26 namespace { | |
| 27 | |
| 28 // We want to test ranges of valid UTF-8 sequences. These ranges are inclusive. | |
| 29 // They are intended to be large enough that the validator needs to do | |
| 30 // meaningful work while being in some sense "realistic" (eg. control characters | |
| 31 // are not included). | |
| 32 const char kOneByteSeqRangeStart[] = " "; // U+0020 | |
| 33 const char kOneByteSeqRangeEnd[] = "~"; // U+007E | |
| 34 | |
| 35 const char kTwoByteSeqRangeStart[] = "\xc2\xa0"; // U+00A0 non-breaking space | |
| 36 const char kTwoByteSeqRangeEnd[] = "\xc9\x8f"; // U+024F small y with stroke | |
| 37 | |
| 38 const char kThreeByteSeqRangeStart[] = "\xe3\x81\x82"; // U+3042 Hiragana "a" | |
| 39 const char kThreeByteSeqRangeEnd[] = "\xe9\xbf\x83"; // U+9FC3 "to blink" | |
| 40 | |
| 41 const char kFourByteSeqRangeStart[] = "\xf0\xa0\x80\x8b"; // U+2000B | |
| 42 const char kFourByteSeqRangeEnd[] = "\xf0\xaa\x9a\xb2"; // U+2A6B2 | |
| 43 | |
| 44 // The different lengths of strings to test. | |
| 45 const size_t kTestLengths[] = {1, 32, 256, 32768, 1 << 20}; | |
| 46 | |
| 47 // Simplest possible byte-at-a-time validator, to provide a baseline | |
| 48 // for comparison. This is only tried on 1-byte UTF-8 sequences, as | |
| 49 // the results will not be meaningful with sequences containing | |
| 50 // top-bit-set bytes. | |
| 51 bool IsString7Bit(const std::string& s) { | |
| 52 for (std::string::const_iterator it = s.begin(); it != s.end(); ++it) { | |
| 53 if (*it & 0x80) | |
| 54 return false; | |
| 55 } | |
| 56 return true; | |
| 57 } | |
| 58 | |
| 59 // Assumes that |previous| is a valid UTF-8 sequence, and attempts to return | |
| 60 // the next one. Is just barely smart enough to iterate through the ranges | |
| 61 // defined about. | |
| 62 std::string NextUtf8Sequence(const std::string& previous) { | |
| 63 DCHECK(StreamingUtf8Validator::Validate(previous)); | |
| 64 std::string next = previous; | |
| 65 for (int i = static_cast<int>(previous.length() - 1); i >= 0; --i) { | |
| 66 // All bytes in a UTF-8 sequence except the first one are | |
| 67 // constrained to the range 0x80 to 0xbf, inclusive. When we | |
| 68 // increment past 0xbf, we carry into the previous byte. | |
| 69 if (i > 0 && next[i] == '\xbf') { | |
| 70 next[i] = '\x80'; | |
| 71 continue; // carry | |
| 72 } | |
| 73 ++next[i]; | |
| 74 break; // no carry | |
| 75 } | |
| 76 DCHECK(StreamingUtf8Validator::Validate(next)) | |
| 77 << "Result \"" << next << "\" failed validation"; | |
| 78 return next; | |
| 79 } | |
| 80 | |
| 81 typedef bool (*TestTargetType)(const std::string&); | |
| 82 | |
| 83 // Run fuction |target| over |test_string| |times| times, and report the results | |
| 84 // using |description|. | |
| 85 bool RunTest(const std::string& description, | |
| 86 TestTargetType target, | |
| 87 const std::string& test_string, | |
| 88 int times) { | |
| 89 base::PerfTimeLogger timer(description.c_str()); | |
| 90 bool result = true; | |
| 91 for (int i = 0; i < times; ++i) { | |
| 92 result = target(test_string) && result; | |
| 93 } | |
| 94 timer.Done(); | |
| 95 return result; | |
| 96 } | |
| 97 | |
| 98 // Construct a string by repeating |input| enough times to equal or exceed | |
| 99 // |length|. | |
| 100 std::string ConstructRepeatedTestString(const std::string& input, | |
| 101 size_t length) { | |
| 102 std::string output = input; | |
| 103 while (output.length() * 2 < length) { | |
| 104 output += output; | |
| 105 } | |
| 106 if (output.length() < length) { | |
| 107 output += ConstructRepeatedTestString(input, length - output.length()); | |
| 108 } | |
| 109 return output; | |
| 110 } | |
| 111 | |
| 112 // Construct a string by expanding the range of UTF-8 sequences | |
| 113 // between |input_start| and |input_end|, inclusive, and then | |
| 114 // repeating the resulting string until it equals or exceeds |length| | |
| 115 // bytes. |input_start| and |input_end| must be valid UTF-8 | |
| 116 // sequences. | |
| 117 std::string ConstructRangedTestString(const std::string& input_start, | |
| 118 const std::string& input_end, | |
| 119 size_t length) { | |
| 120 std::string output = input_start; | |
| 121 std::string input = input_start; | |
| 122 while (output.length() < length && input != input_end) { | |
| 123 input = NextUtf8Sequence(input); | |
| 124 output += input; | |
| 125 } | |
| 126 if (output.length() < length) { | |
| 127 output = ConstructRepeatedTestString(output, length); | |
| 128 } | |
| 129 return output; | |
| 130 } | |
| 131 | |
| 132 struct TestFunctionDescription { | |
| 133 TestTargetType function; | |
| 134 const char* function_name; | |
| 135 }; | |
| 136 | |
| 137 bool IsStringUTF8(const std::string& str) { | |
| 138 return base::IsStringUTF8(base::StringPiece(str)); | |
| 139 } | |
| 140 | |
| 141 // IsString7Bit is intentionally placed last so it can be excluded easily. | |
| 142 const TestFunctionDescription kTestFunctions[] = { | |
| 143 {&StreamingUtf8Validator::Validate, "StreamingUtf8Validator"}, | |
| 144 {&IsStringUTF8, "IsStringUTF8"}, {&IsString7Bit, "IsString7Bit"}}; | |
| 145 | |
| 146 // Construct a test string from |construct_test_string| for each of the lengths | |
| 147 // in |kTestLengths| in turn. For each string, run each test in |test_functions| | |
| 148 // for a number of iterations such that the total number of bytes validated | |
| 149 // is around 16MB. | |
| 150 void RunSomeTests( | |
| 151 const char format[], | |
| 152 base::Callback<std::string(size_t length)> construct_test_string, | |
| 153 const TestFunctionDescription* test_functions, | |
| 154 size_t test_count) { | |
| 155 for (size_t i = 0; i < arraysize(kTestLengths); ++i) { | |
| 156 const size_t length = kTestLengths[i]; | |
| 157 const std::string test_string = construct_test_string.Run(length); | |
| 158 const int real_length = static_cast<int>(test_string.length()); | |
| 159 const int times = (1 << 24) / real_length; | |
| 160 for (size_t test_index = 0; test_index < test_count; ++test_index) { | |
| 161 EXPECT_TRUE(RunTest(StringPrintf(format, | |
| 162 test_functions[test_index].function_name, | |
| 163 real_length, | |
| 164 times), | |
| 165 test_functions[test_index].function, | |
| 166 test_string, | |
| 167 times)); | |
| 168 } | |
| 169 } | |
| 170 } | |
| 171 | |
| 172 TEST(StreamingUtf8ValidatorPerfTest, OneByteRepeated) { | |
| 173 RunSomeTests("%s: bytes=1 repeated length=%d repeat=%d", | |
| 174 base::Bind(ConstructRepeatedTestString, kOneByteSeqRangeStart), | |
| 175 kTestFunctions, | |
| 176 3); | |
| 177 } | |
| 178 | |
| 179 TEST(StreamingUtf8ValidatorPerfTest, OneByteRange) { | |
| 180 RunSomeTests("%s: bytes=1 ranged length=%d repeat=%d", | |
| 181 base::Bind(ConstructRangedTestString, | |
| 182 kOneByteSeqRangeStart, | |
| 183 kOneByteSeqRangeEnd), | |
| 184 kTestFunctions, | |
| 185 3); | |
| 186 } | |
| 187 | |
| 188 TEST(StreamingUtf8ValidatorPerfTest, TwoByteRepeated) { | |
| 189 RunSomeTests("%s: bytes=2 repeated length=%d repeat=%d", | |
| 190 base::Bind(ConstructRepeatedTestString, kTwoByteSeqRangeStart), | |
| 191 kTestFunctions, | |
| 192 2); | |
| 193 } | |
| 194 | |
| 195 TEST(StreamingUtf8ValidatorPerfTest, TwoByteRange) { | |
| 196 RunSomeTests("%s: bytes=2 ranged length=%d repeat=%d", | |
| 197 base::Bind(ConstructRangedTestString, | |
| 198 kTwoByteSeqRangeStart, | |
| 199 kTwoByteSeqRangeEnd), | |
| 200 kTestFunctions, | |
| 201 2); | |
| 202 } | |
| 203 | |
| 204 TEST(StreamingUtf8ValidatorPerfTest, ThreeByteRepeated) { | |
| 205 RunSomeTests( | |
| 206 "%s: bytes=3 repeated length=%d repeat=%d", | |
| 207 base::Bind(ConstructRepeatedTestString, kThreeByteSeqRangeStart), | |
| 208 kTestFunctions, | |
| 209 2); | |
| 210 } | |
| 211 | |
| 212 TEST(StreamingUtf8ValidatorPerfTest, ThreeByteRange) { | |
| 213 RunSomeTests("%s: bytes=3 ranged length=%d repeat=%d", | |
| 214 base::Bind(ConstructRangedTestString, | |
| 215 kThreeByteSeqRangeStart, | |
| 216 kThreeByteSeqRangeEnd), | |
| 217 kTestFunctions, | |
| 218 2); | |
| 219 } | |
| 220 | |
| 221 TEST(StreamingUtf8ValidatorPerfTest, FourByteRepeated) { | |
| 222 RunSomeTests("%s: bytes=4 repeated length=%d repeat=%d", | |
| 223 base::Bind(ConstructRepeatedTestString, kFourByteSeqRangeStart), | |
| 224 kTestFunctions, | |
| 225 2); | |
| 226 } | |
| 227 | |
| 228 TEST(StreamingUtf8ValidatorPerfTest, FourByteRange) { | |
| 229 RunSomeTests("%s: bytes=4 ranged length=%d repeat=%d", | |
| 230 base::Bind(ConstructRangedTestString, | |
| 231 kFourByteSeqRangeStart, | |
| 232 kFourByteSeqRangeEnd), | |
| 233 kTestFunctions, | |
| 234 2); | |
| 235 } | |
| 236 | |
| 237 } // namespace | |
| 238 } // namespace base | |
| OLD | NEW |