| OLD | NEW |
| (Empty) |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "base/strings/string_util.h" | |
| 6 | |
| 7 #include <math.h> | |
| 8 #include <stdarg.h> | |
| 9 | |
| 10 #include <algorithm> | |
| 11 | |
| 12 #include "base/basictypes.h" | |
| 13 #include "base/strings/string16.h" | |
| 14 #include "base/strings/utf_string_conversions.h" | |
| 15 #include "testing/gmock/include/gmock/gmock.h" | |
| 16 #include "testing/gtest/include/gtest/gtest.h" | |
| 17 | |
| 18 using ::testing::ElementsAre; | |
| 19 | |
| 20 namespace base { | |
| 21 | |
| 22 static const struct trim_case { | |
| 23 const wchar_t* input; | |
| 24 const TrimPositions positions; | |
| 25 const wchar_t* output; | |
| 26 const TrimPositions return_value; | |
| 27 } trim_cases[] = { | |
| 28 {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING}, | |
| 29 {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING}, | |
| 30 {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL}, | |
| 31 {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE}, | |
| 32 {L"", TRIM_ALL, L"", TRIM_NONE}, | |
| 33 {L" ", TRIM_LEADING, L"", TRIM_LEADING}, | |
| 34 {L" ", TRIM_TRAILING, L"", TRIM_TRAILING}, | |
| 35 {L" ", TRIM_ALL, L"", TRIM_ALL}, | |
| 36 {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL}, | |
| 37 {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL}, | |
| 38 }; | |
| 39 | |
| 40 static const struct trim_case_ascii { | |
| 41 const char* input; | |
| 42 const TrimPositions positions; | |
| 43 const char* output; | |
| 44 const TrimPositions return_value; | |
| 45 } trim_cases_ascii[] = { | |
| 46 {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING}, | |
| 47 {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING}, | |
| 48 {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL}, | |
| 49 {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE}, | |
| 50 {"", TRIM_ALL, "", TRIM_NONE}, | |
| 51 {" ", TRIM_LEADING, "", TRIM_LEADING}, | |
| 52 {" ", TRIM_TRAILING, "", TRIM_TRAILING}, | |
| 53 {" ", TRIM_ALL, "", TRIM_ALL}, | |
| 54 {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL}, | |
| 55 }; | |
| 56 | |
| 57 namespace { | |
| 58 | |
| 59 // Helper used to test TruncateUTF8ToByteSize. | |
| 60 bool Truncated(const std::string& input, | |
| 61 const size_t byte_size, | |
| 62 std::string* output) { | |
| 63 size_t prev = input.length(); | |
| 64 TruncateUTF8ToByteSize(input, byte_size, output); | |
| 65 return prev != output->length(); | |
| 66 } | |
| 67 | |
| 68 } // namespace | |
| 69 | |
| 70 TEST(StringUtilTest, TruncateUTF8ToByteSize) { | |
| 71 std::string output; | |
| 72 | |
| 73 // Empty strings and invalid byte_size arguments | |
| 74 EXPECT_FALSE(Truncated(std::string(), 0, &output)); | |
| 75 EXPECT_EQ(output, ""); | |
| 76 EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output)); | |
| 77 EXPECT_EQ(output, ""); | |
| 78 EXPECT_FALSE(Truncated("\xe1\x80\xbf", static_cast<size_t>(-1), &output)); | |
| 79 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output)); | |
| 80 | |
| 81 // Testing the truncation of valid UTF8 correctly | |
| 82 EXPECT_TRUE(Truncated("abc", 2, &output)); | |
| 83 EXPECT_EQ(output, "ab"); | |
| 84 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output)); | |
| 85 EXPECT_EQ(output.compare("\xc2\x81"), 0); | |
| 86 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output)); | |
| 87 EXPECT_EQ(output.compare("\xc2\x81"), 0); | |
| 88 EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output)); | |
| 89 EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0); | |
| 90 | |
| 91 { | |
| 92 const char array[] = "\x00\x00\xc2\x81\xc2\x81"; | |
| 93 const std::string array_string(array, arraysize(array)); | |
| 94 EXPECT_TRUE(Truncated(array_string, 4, &output)); | |
| 95 EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0); | |
| 96 } | |
| 97 | |
| 98 { | |
| 99 const char array[] = "\x00\xc2\x81\xc2\x81"; | |
| 100 const std::string array_string(array, arraysize(array)); | |
| 101 EXPECT_TRUE(Truncated(array_string, 4, &output)); | |
| 102 EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0); | |
| 103 } | |
| 104 | |
| 105 // Testing invalid UTF8 | |
| 106 EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output)); | |
| 107 EXPECT_EQ(output.compare(""), 0); | |
| 108 EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output)); | |
| 109 EXPECT_EQ(output.compare(""), 0); | |
| 110 EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output)); | |
| 111 EXPECT_EQ(output.compare(""), 0); | |
| 112 | |
| 113 // Testing invalid UTF8 mixed with valid UTF8 | |
| 114 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output)); | |
| 115 EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0); | |
| 116 EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output)); | |
| 117 EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0); | |
| 118 EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf", | |
| 119 10, &output)); | |
| 120 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0); | |
| 121 EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0", | |
| 122 10, &output)); | |
| 123 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0); | |
| 124 EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output)); | |
| 125 EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0); | |
| 126 | |
| 127 // Overlong sequences | |
| 128 EXPECT_TRUE(Truncated("\xc0\x80", 2, &output)); | |
| 129 EXPECT_EQ(output.compare(""), 0); | |
| 130 EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output)); | |
| 131 EXPECT_EQ(output.compare(""), 0); | |
| 132 EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output)); | |
| 133 EXPECT_EQ(output.compare(""), 0); | |
| 134 EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output)); | |
| 135 EXPECT_EQ(output.compare(""), 0); | |
| 136 EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output)); | |
| 137 EXPECT_EQ(output.compare(""), 0); | |
| 138 EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output)); | |
| 139 EXPECT_EQ(output.compare(""), 0); | |
| 140 EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output)); | |
| 141 EXPECT_EQ(output.compare(""), 0); | |
| 142 EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output)); | |
| 143 EXPECT_EQ(output.compare(""), 0); | |
| 144 EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output)); | |
| 145 EXPECT_EQ(output.compare(""), 0); | |
| 146 EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output)); | |
| 147 EXPECT_EQ(output.compare(""), 0); | |
| 148 EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output)); | |
| 149 EXPECT_EQ(output.compare(""), 0); | |
| 150 | |
| 151 // Beyond U+10FFFF (the upper limit of Unicode codespace) | |
| 152 EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output)); | |
| 153 EXPECT_EQ(output.compare(""), 0); | |
| 154 EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output)); | |
| 155 EXPECT_EQ(output.compare(""), 0); | |
| 156 EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output)); | |
| 157 EXPECT_EQ(output.compare(""), 0); | |
| 158 | |
| 159 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE) | |
| 160 EXPECT_TRUE(Truncated("\xfe\xff", 2, &output)); | |
| 161 EXPECT_EQ(output.compare(""), 0); | |
| 162 EXPECT_TRUE(Truncated("\xff\xfe", 2, &output)); | |
| 163 EXPECT_EQ(output.compare(""), 0); | |
| 164 | |
| 165 { | |
| 166 const char array[] = "\x00\x00\xfe\xff"; | |
| 167 const std::string array_string(array, arraysize(array)); | |
| 168 EXPECT_TRUE(Truncated(array_string, 4, &output)); | |
| 169 EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0); | |
| 170 } | |
| 171 | |
| 172 // Variants on the previous test | |
| 173 { | |
| 174 const char array[] = "\xff\xfe\x00\x00"; | |
| 175 const std::string array_string(array, 4); | |
| 176 EXPECT_FALSE(Truncated(array_string, 4, &output)); | |
| 177 EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0); | |
| 178 } | |
| 179 { | |
| 180 const char array[] = "\xff\x00\x00\xfe"; | |
| 181 const std::string array_string(array, arraysize(array)); | |
| 182 EXPECT_TRUE(Truncated(array_string, 4, &output)); | |
| 183 EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0); | |
| 184 } | |
| 185 | |
| 186 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF> | |
| 187 EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output)); | |
| 188 EXPECT_EQ(output.compare(""), 0); | |
| 189 EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output)); | |
| 190 EXPECT_EQ(output.compare(""), 0); | |
| 191 EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output)); | |
| 192 EXPECT_EQ(output.compare(""), 0); | |
| 193 EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output)); | |
| 194 EXPECT_EQ(output.compare(""), 0); | |
| 195 EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output)); | |
| 196 EXPECT_EQ(output.compare(""), 0); | |
| 197 | |
| 198 // Strings in legacy encodings that are valid in UTF-8, but | |
| 199 // are invalid as UTF-8 in real data. | |
| 200 EXPECT_TRUE(Truncated("caf\xe9", 4, &output)); | |
| 201 EXPECT_EQ(output.compare("caf"), 0); | |
| 202 EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output)); | |
| 203 EXPECT_EQ(output.compare(""), 0); | |
| 204 EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output)); | |
| 205 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0); | |
| 206 EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7, | |
| 207 &output)); | |
| 208 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0); | |
| 209 | |
| 210 // Testing using the same string as input and output. | |
| 211 EXPECT_FALSE(Truncated(output, 4, &output)); | |
| 212 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0); | |
| 213 EXPECT_TRUE(Truncated(output, 3, &output)); | |
| 214 EXPECT_EQ(output.compare("\xa7\x41"), 0); | |
| 215 | |
| 216 // "abc" with U+201[CD] in windows-125[0-8] | |
| 217 EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output)); | |
| 218 EXPECT_EQ(output.compare("\x93" "abc"), 0); | |
| 219 | |
| 220 // U+0639 U+064E U+0644 U+064E in ISO-8859-6 | |
| 221 EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output)); | |
| 222 EXPECT_EQ(output.compare(""), 0); | |
| 223 | |
| 224 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7 | |
| 225 EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output)); | |
| 226 EXPECT_EQ(output.compare(""), 0); | |
| 227 } | |
| 228 | |
| 229 TEST(StringUtilTest, TrimWhitespace) { | |
| 230 string16 output; // Allow contents to carry over to next testcase | |
| 231 for (size_t i = 0; i < arraysize(trim_cases); ++i) { | |
| 232 const trim_case& value = trim_cases[i]; | |
| 233 EXPECT_EQ(value.return_value, | |
| 234 TrimWhitespace(WideToUTF16(value.input), value.positions, | |
| 235 &output)); | |
| 236 EXPECT_EQ(WideToUTF16(value.output), output); | |
| 237 } | |
| 238 | |
| 239 // Test that TrimWhitespace() can take the same string for input and output | |
| 240 output = ASCIIToUTF16(" This is a test \r\n"); | |
| 241 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output)); | |
| 242 EXPECT_EQ(ASCIIToUTF16("This is a test"), output); | |
| 243 | |
| 244 // Once more, but with a string of whitespace | |
| 245 output = ASCIIToUTF16(" \r\n"); | |
| 246 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output)); | |
| 247 EXPECT_EQ(string16(), output); | |
| 248 | |
| 249 std::string output_ascii; | |
| 250 for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) { | |
| 251 const trim_case_ascii& value = trim_cases_ascii[i]; | |
| 252 EXPECT_EQ(value.return_value, | |
| 253 TrimWhitespace(value.input, value.positions, &output_ascii)); | |
| 254 EXPECT_EQ(value.output, output_ascii); | |
| 255 } | |
| 256 } | |
| 257 | |
| 258 static const struct collapse_case { | |
| 259 const wchar_t* input; | |
| 260 const bool trim; | |
| 261 const wchar_t* output; | |
| 262 } collapse_cases[] = { | |
| 263 {L" Google Video ", false, L"Google Video"}, | |
| 264 {L"Google Video", false, L"Google Video"}, | |
| 265 {L"", false, L""}, | |
| 266 {L" ", false, L""}, | |
| 267 {L"\t\rTest String\n", false, L"Test String"}, | |
| 268 {L"\x2002Test String\x00A0\x3000", false, L"Test String"}, | |
| 269 {L" Test \n \t String ", false, L"Test String"}, | |
| 270 {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"}, | |
| 271 {L" Test String", false, L"Test String"}, | |
| 272 {L"Test String ", false, L"Test String"}, | |
| 273 {L"Test String", false, L"Test String"}, | |
| 274 {L"", true, L""}, | |
| 275 {L"\n", true, L""}, | |
| 276 {L" \r ", true, L""}, | |
| 277 {L"\nFoo", true, L"Foo"}, | |
| 278 {L"\r Foo ", true, L"Foo"}, | |
| 279 {L" Foo bar ", true, L"Foo bar"}, | |
| 280 {L" \tFoo bar \n", true, L"Foo bar"}, | |
| 281 {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"}, | |
| 282 }; | |
| 283 | |
| 284 TEST(StringUtilTest, CollapseWhitespace) { | |
| 285 for (size_t i = 0; i < arraysize(collapse_cases); ++i) { | |
| 286 const collapse_case& value = collapse_cases[i]; | |
| 287 EXPECT_EQ(WideToUTF16(value.output), | |
| 288 CollapseWhitespace(WideToUTF16(value.input), value.trim)); | |
| 289 } | |
| 290 } | |
| 291 | |
| 292 static const struct collapse_case_ascii { | |
| 293 const char* input; | |
| 294 const bool trim; | |
| 295 const char* output; | |
| 296 } collapse_cases_ascii[] = { | |
| 297 {" Google Video ", false, "Google Video"}, | |
| 298 {"Google Video", false, "Google Video"}, | |
| 299 {"", false, ""}, | |
| 300 {" ", false, ""}, | |
| 301 {"\t\rTest String\n", false, "Test String"}, | |
| 302 {" Test \n \t String ", false, "Test String"}, | |
| 303 {" Test String", false, "Test String"}, | |
| 304 {"Test String ", false, "Test String"}, | |
| 305 {"Test String", false, "Test String"}, | |
| 306 {"", true, ""}, | |
| 307 {"\n", true, ""}, | |
| 308 {" \r ", true, ""}, | |
| 309 {"\nFoo", true, "Foo"}, | |
| 310 {"\r Foo ", true, "Foo"}, | |
| 311 {" Foo bar ", true, "Foo bar"}, | |
| 312 {" \tFoo bar \n", true, "Foo bar"}, | |
| 313 {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"}, | |
| 314 }; | |
| 315 | |
| 316 TEST(StringUtilTest, CollapseWhitespaceASCII) { | |
| 317 for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) { | |
| 318 const collapse_case_ascii& value = collapse_cases_ascii[i]; | |
| 319 EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim)); | |
| 320 } | |
| 321 } | |
| 322 | |
| 323 TEST(StringUtilTest, IsStringUTF8) { | |
| 324 EXPECT_TRUE(IsStringUTF8("abc")); | |
| 325 EXPECT_TRUE(IsStringUTF8("\xc2\x81")); | |
| 326 EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf")); | |
| 327 EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf")); | |
| 328 EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf")); | |
| 329 EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM | |
| 330 | |
| 331 // surrogate code points | |
| 332 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf")); | |
| 333 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f")); | |
| 334 EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf")); | |
| 335 | |
| 336 // overlong sequences | |
| 337 EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000 | |
| 338 EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB" | |
| 339 EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000 | |
| 340 EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080 | |
| 341 EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff | |
| 342 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D | |
| 343 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091 | |
| 344 EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800 | |
| 345 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM) | |
| 346 EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F | |
| 347 EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5 | |
| 348 | |
| 349 // Beyond U+10FFFF (the upper limit of Unicode codespace) | |
| 350 EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000 | |
| 351 EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes | |
| 352 EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes | |
| 353 | |
| 354 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE) | |
| 355 EXPECT_FALSE(IsStringUTF8("\xfe\xff")); | |
| 356 EXPECT_FALSE(IsStringUTF8("\xff\xfe")); | |
| 357 EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4))); | |
| 358 EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00")); | |
| 359 | |
| 360 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF> | |
| 361 EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE) | |
| 362 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE | |
| 363 EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF | |
| 364 EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0 | |
| 365 EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF | |
| 366 // Strings in legacy encodings. We can certainly make up strings | |
| 367 // in a legacy encoding that are valid in UTF-8, but in real data, | |
| 368 // most of them are invalid as UTF-8. | |
| 369 EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1 | |
| 370 EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR | |
| 371 EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5 | |
| 372 // "abc" with U+201[CD] in windows-125[0-8] | |
| 373 EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94")); | |
| 374 // U+0639 U+064E U+0644 U+064E in ISO-8859-6 | |
| 375 EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee")); | |
| 376 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7 | |
| 377 EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC")); | |
| 378 | |
| 379 // Check that we support Embedded Nulls. The first uses the canonical UTF-8 | |
| 380 // representation, and the second uses a 2-byte sequence. The second version | |
| 381 // is invalid UTF-8 since UTF-8 states that the shortest encoding for a | |
| 382 // given codepoint must be used. | |
| 383 static const char kEmbeddedNull[] = "embedded\0null"; | |
| 384 EXPECT_TRUE(IsStringUTF8( | |
| 385 std::string(kEmbeddedNull, sizeof(kEmbeddedNull)))); | |
| 386 EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000")); | |
| 387 } | |
| 388 | |
| 389 TEST(StringUtilTest, IsStringASCII) { | |
| 390 static char char_ascii[] = | |
| 391 "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF"; | |
| 392 static char16 char16_ascii[] = { | |
| 393 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'A', | |
| 394 'B', 'C', 'D', 'E', 'F', '0', '1', '2', '3', '4', '5', '6', | |
| 395 '7', '8', '9', '0', 'A', 'B', 'C', 'D', 'E', 'F', 0 }; | |
| 396 static std::wstring wchar_ascii( | |
| 397 L"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF"); | |
| 398 | |
| 399 // Test a variety of the fragment start positions and lengths in order to make | |
| 400 // sure that bit masking in IsStringASCII works correctly. | |
| 401 // Also, test that a non-ASCII character will be detected regardless of its | |
| 402 // position inside the string. | |
| 403 { | |
| 404 const size_t string_length = arraysize(char_ascii) - 1; | |
| 405 for (size_t offset = 0; offset < 8; ++offset) { | |
| 406 for (size_t len = 0, max_len = string_length - offset; len < max_len; | |
| 407 ++len) { | |
| 408 EXPECT_TRUE(IsStringASCII(StringPiece(char_ascii + offset, len))); | |
| 409 for (size_t char_pos = offset; char_pos < len; ++char_pos) { | |
| 410 char_ascii[char_pos] |= '\x80'; | |
| 411 EXPECT_FALSE(IsStringASCII(StringPiece(char_ascii + offset, len))); | |
| 412 char_ascii[char_pos] &= ~'\x80'; | |
| 413 } | |
| 414 } | |
| 415 } | |
| 416 } | |
| 417 | |
| 418 { | |
| 419 const size_t string_length = arraysize(char16_ascii) - 1; | |
| 420 for (size_t offset = 0; offset < 4; ++offset) { | |
| 421 for (size_t len = 0, max_len = string_length - offset; len < max_len; | |
| 422 ++len) { | |
| 423 EXPECT_TRUE(IsStringASCII(StringPiece16(char16_ascii + offset, len))); | |
| 424 for (size_t char_pos = offset; char_pos < len; ++char_pos) { | |
| 425 char16_ascii[char_pos] |= 0x80; | |
| 426 EXPECT_FALSE( | |
| 427 IsStringASCII(StringPiece16(char16_ascii + offset, len))); | |
| 428 char16_ascii[char_pos] &= ~0x80; | |
| 429 // Also test when the upper half is non-zero. | |
| 430 char16_ascii[char_pos] |= 0x100; | |
| 431 EXPECT_FALSE( | |
| 432 IsStringASCII(StringPiece16(char16_ascii + offset, len))); | |
| 433 char16_ascii[char_pos] &= ~0x100; | |
| 434 } | |
| 435 } | |
| 436 } | |
| 437 } | |
| 438 | |
| 439 { | |
| 440 const size_t string_length = wchar_ascii.length(); | |
| 441 for (size_t len = 0; len < string_length; ++len) { | |
| 442 EXPECT_TRUE(IsStringASCII(wchar_ascii.substr(0, len))); | |
| 443 for (size_t char_pos = 0; char_pos < len; ++char_pos) { | |
| 444 wchar_ascii[char_pos] |= 0x80; | |
| 445 EXPECT_FALSE( | |
| 446 IsStringASCII(wchar_ascii.substr(0, len))); | |
| 447 wchar_ascii[char_pos] &= ~0x80; | |
| 448 wchar_ascii[char_pos] |= 0x100; | |
| 449 EXPECT_FALSE( | |
| 450 IsStringASCII(wchar_ascii.substr(0, len))); | |
| 451 wchar_ascii[char_pos] &= ~0x100; | |
| 452 #if defined(WCHAR_T_IS_UTF32) | |
| 453 wchar_ascii[char_pos] |= 0x10000; | |
| 454 EXPECT_FALSE( | |
| 455 IsStringASCII(wchar_ascii.substr(0, len))); | |
| 456 wchar_ascii[char_pos] &= ~0x10000; | |
| 457 #endif // WCHAR_T_IS_UTF32 | |
| 458 } | |
| 459 } | |
| 460 } | |
| 461 } | |
| 462 | |
| 463 TEST(StringUtilTest, ConvertASCII) { | |
| 464 static const char* const char_cases[] = { | |
| 465 "Google Video", | |
| 466 "Hello, world\n", | |
| 467 "0123ABCDwxyz \a\b\t\r\n!+,.~" | |
| 468 }; | |
| 469 | |
| 470 static const wchar_t* const wchar_cases[] = { | |
| 471 L"Google Video", | |
| 472 L"Hello, world\n", | |
| 473 L"0123ABCDwxyz \a\b\t\r\n!+,.~" | |
| 474 }; | |
| 475 | |
| 476 for (size_t i = 0; i < arraysize(char_cases); ++i) { | |
| 477 EXPECT_TRUE(IsStringASCII(char_cases[i])); | |
| 478 string16 utf16 = ASCIIToUTF16(char_cases[i]); | |
| 479 EXPECT_EQ(WideToUTF16(wchar_cases[i]), utf16); | |
| 480 | |
| 481 std::string ascii = UTF16ToASCII(WideToUTF16(wchar_cases[i])); | |
| 482 EXPECT_EQ(char_cases[i], ascii); | |
| 483 } | |
| 484 | |
| 485 EXPECT_FALSE(IsStringASCII("Google \x80Video")); | |
| 486 | |
| 487 // Convert empty strings. | |
| 488 string16 empty16; | |
| 489 std::string empty; | |
| 490 EXPECT_EQ(empty, UTF16ToASCII(empty16)); | |
| 491 EXPECT_EQ(empty16, ASCIIToUTF16(empty)); | |
| 492 | |
| 493 // Convert strings with an embedded NUL character. | |
| 494 const char chars_with_nul[] = "test\0string"; | |
| 495 const int length_with_nul = arraysize(chars_with_nul) - 1; | |
| 496 std::string string_with_nul(chars_with_nul, length_with_nul); | |
| 497 string16 string16_with_nul = ASCIIToUTF16(string_with_nul); | |
| 498 EXPECT_EQ(static_cast<string16::size_type>(length_with_nul), | |
| 499 string16_with_nul.length()); | |
| 500 std::string narrow_with_nul = UTF16ToASCII(string16_with_nul); | |
| 501 EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul), | |
| 502 narrow_with_nul.length()); | |
| 503 EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul)); | |
| 504 } | |
| 505 | |
| 506 TEST(StringUtilTest, ToUpperASCII) { | |
| 507 EXPECT_EQ('C', ToUpperASCII('C')); | |
| 508 EXPECT_EQ('C', ToUpperASCII('c')); | |
| 509 EXPECT_EQ('2', ToUpperASCII('2')); | |
| 510 | |
| 511 EXPECT_EQ(L'C', ToUpperASCII(L'C')); | |
| 512 EXPECT_EQ(L'C', ToUpperASCII(L'c')); | |
| 513 EXPECT_EQ(L'2', ToUpperASCII(L'2')); | |
| 514 | |
| 515 std::string in_place_a("Cc2"); | |
| 516 StringToUpperASCII(&in_place_a); | |
| 517 EXPECT_EQ("CC2", in_place_a); | |
| 518 | |
| 519 std::wstring in_place_w(L"Cc2"); | |
| 520 StringToUpperASCII(&in_place_w); | |
| 521 EXPECT_EQ(L"CC2", in_place_w); | |
| 522 | |
| 523 std::string original_a("Cc2"); | |
| 524 std::string upper_a = StringToUpperASCII(original_a); | |
| 525 EXPECT_EQ("CC2", upper_a); | |
| 526 | |
| 527 std::wstring original_w(L"Cc2"); | |
| 528 std::wstring upper_w = StringToUpperASCII(original_w); | |
| 529 EXPECT_EQ(L"CC2", upper_w); | |
| 530 } | |
| 531 | |
| 532 TEST(StringUtilTest, LowerCaseEqualsASCII) { | |
| 533 static const struct { | |
| 534 const char* src_a; | |
| 535 const char* dst; | |
| 536 } lowercase_cases[] = { | |
| 537 { "FoO", "foo" }, | |
| 538 { "foo", "foo" }, | |
| 539 { "FOO", "foo" }, | |
| 540 }; | |
| 541 | |
| 542 for (size_t i = 0; i < arraysize(lowercase_cases); ++i) { | |
| 543 EXPECT_TRUE(LowerCaseEqualsASCII(ASCIIToUTF16(lowercase_cases[i].src_a), | |
| 544 lowercase_cases[i].dst)); | |
| 545 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a, | |
| 546 lowercase_cases[i].dst)); | |
| 547 } | |
| 548 } | |
| 549 | |
| 550 TEST(StringUtilTest, FormatBytesUnlocalized) { | |
| 551 static const struct { | |
| 552 int64 bytes; | |
| 553 const char* expected; | |
| 554 } cases[] = { | |
| 555 // Expected behavior: we show one post-decimal digit when we have | |
| 556 // under two pre-decimal digits, except in cases where it makes no | |
| 557 // sense (zero or bytes). | |
| 558 // Since we switch units once we cross the 1000 mark, this keeps | |
| 559 // the display of file sizes or bytes consistently around three | |
| 560 // digits. | |
| 561 {0, "0 B"}, | |
| 562 {512, "512 B"}, | |
| 563 {1024*1024, "1.0 MB"}, | |
| 564 {1024*1024*1024, "1.0 GB"}, | |
| 565 {10LL*1024*1024*1024, "10.0 GB"}, | |
| 566 {99LL*1024*1024*1024, "99.0 GB"}, | |
| 567 {105LL*1024*1024*1024, "105 GB"}, | |
| 568 {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"}, | |
| 569 {~(1LL << 63), "8192 PB"}, | |
| 570 | |
| 571 {99*1024 + 103, "99.1 kB"}, | |
| 572 {1024*1024 + 103, "1.0 MB"}, | |
| 573 {1024*1024 + 205 * 1024, "1.2 MB"}, | |
| 574 {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"}, | |
| 575 {10LL*1024*1024*1024, "10.0 GB"}, | |
| 576 {100LL*1024*1024*1024, "100 GB"}, | |
| 577 }; | |
| 578 | |
| 579 for (size_t i = 0; i < arraysize(cases); ++i) { | |
| 580 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), | |
| 581 FormatBytesUnlocalized(cases[i].bytes)); | |
| 582 } | |
| 583 } | |
| 584 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) { | |
| 585 static const struct { | |
| 586 const char* str; | |
| 587 string16::size_type start_offset; | |
| 588 const char* find_this; | |
| 589 const char* replace_with; | |
| 590 const char* expected; | |
| 591 } cases[] = { | |
| 592 {"aaa", 0, "a", "b", "bbb"}, | |
| 593 {"abb", 0, "ab", "a", "ab"}, | |
| 594 {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "}, | |
| 595 {"Not found", 0, "x", "0", "Not found"}, | |
| 596 {"Not found again", 5, "x", "0", "Not found again"}, | |
| 597 {" Making it much longer ", 0, " ", "Four score and seven years ago", | |
| 598 "Four score and seven years agoMakingFour score and seven years agoit" | |
| 599 "Four score and seven years agomuchFour score and seven years agolonger" | |
| 600 "Four score and seven years ago"}, | |
| 601 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"}, | |
| 602 {"Replace me only me once", 9, "me ", "", "Replace me only once"}, | |
| 603 {"abababab", 2, "ab", "c", "abccc"}, | |
| 604 }; | |
| 605 | |
| 606 for (size_t i = 0; i < arraysize(cases); i++) { | |
| 607 string16 str = ASCIIToUTF16(cases[i].str); | |
| 608 ReplaceSubstringsAfterOffset(&str, cases[i].start_offset, | |
| 609 ASCIIToUTF16(cases[i].find_this), | |
| 610 ASCIIToUTF16(cases[i].replace_with)); | |
| 611 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str); | |
| 612 } | |
| 613 } | |
| 614 | |
| 615 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) { | |
| 616 static const struct { | |
| 617 const char* str; | |
| 618 string16::size_type start_offset; | |
| 619 const char* find_this; | |
| 620 const char* replace_with; | |
| 621 const char* expected; | |
| 622 } cases[] = { | |
| 623 {"aaa", 0, "a", "b", "baa"}, | |
| 624 {"abb", 0, "ab", "a", "ab"}, | |
| 625 {"Removing some substrings inging", 0, "ing", "", | |
| 626 "Remov some substrings inging"}, | |
| 627 {"Not found", 0, "x", "0", "Not found"}, | |
| 628 {"Not found again", 5, "x", "0", "Not found again"}, | |
| 629 {" Making it much longer ", 0, " ", "Four score and seven years ago", | |
| 630 "Four score and seven years agoMaking it much longer "}, | |
| 631 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"}, | |
| 632 {"Replace me only me once", 4, "me ", "", "Replace only me once"}, | |
| 633 {"abababab", 2, "ab", "c", "abcabab"}, | |
| 634 }; | |
| 635 | |
| 636 for (size_t i = 0; i < arraysize(cases); i++) { | |
| 637 string16 str = ASCIIToUTF16(cases[i].str); | |
| 638 ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset, | |
| 639 ASCIIToUTF16(cases[i].find_this), | |
| 640 ASCIIToUTF16(cases[i].replace_with)); | |
| 641 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str); | |
| 642 } | |
| 643 } | |
| 644 | |
| 645 TEST(StringUtilTest, HexDigitToInt) { | |
| 646 EXPECT_EQ(0, HexDigitToInt('0')); | |
| 647 EXPECT_EQ(1, HexDigitToInt('1')); | |
| 648 EXPECT_EQ(2, HexDigitToInt('2')); | |
| 649 EXPECT_EQ(3, HexDigitToInt('3')); | |
| 650 EXPECT_EQ(4, HexDigitToInt('4')); | |
| 651 EXPECT_EQ(5, HexDigitToInt('5')); | |
| 652 EXPECT_EQ(6, HexDigitToInt('6')); | |
| 653 EXPECT_EQ(7, HexDigitToInt('7')); | |
| 654 EXPECT_EQ(8, HexDigitToInt('8')); | |
| 655 EXPECT_EQ(9, HexDigitToInt('9')); | |
| 656 EXPECT_EQ(10, HexDigitToInt('A')); | |
| 657 EXPECT_EQ(11, HexDigitToInt('B')); | |
| 658 EXPECT_EQ(12, HexDigitToInt('C')); | |
| 659 EXPECT_EQ(13, HexDigitToInt('D')); | |
| 660 EXPECT_EQ(14, HexDigitToInt('E')); | |
| 661 EXPECT_EQ(15, HexDigitToInt('F')); | |
| 662 | |
| 663 // Verify the lower case as well. | |
| 664 EXPECT_EQ(10, HexDigitToInt('a')); | |
| 665 EXPECT_EQ(11, HexDigitToInt('b')); | |
| 666 EXPECT_EQ(12, HexDigitToInt('c')); | |
| 667 EXPECT_EQ(13, HexDigitToInt('d')); | |
| 668 EXPECT_EQ(14, HexDigitToInt('e')); | |
| 669 EXPECT_EQ(15, HexDigitToInt('f')); | |
| 670 } | |
| 671 | |
| 672 TEST(StringUtilTest, JoinString) { | |
| 673 std::string separator(", "); | |
| 674 std::vector<std::string> parts; | |
| 675 EXPECT_EQ(std::string(), JoinString(parts, separator)); | |
| 676 | |
| 677 parts.push_back("a"); | |
| 678 EXPECT_EQ("a", JoinString(parts, separator)); | |
| 679 | |
| 680 parts.push_back("b"); | |
| 681 parts.push_back("c"); | |
| 682 EXPECT_EQ("a, b, c", JoinString(parts, separator)); | |
| 683 | |
| 684 parts.push_back(std::string()); | |
| 685 EXPECT_EQ("a, b, c, ", JoinString(parts, separator)); | |
| 686 parts.push_back(" "); | |
| 687 EXPECT_EQ("a|b|c|| ", JoinString(parts, "|")); | |
| 688 } | |
| 689 | |
| 690 TEST(StringUtilTest, JoinString16) { | |
| 691 string16 separator = ASCIIToUTF16(", "); | |
| 692 std::vector<string16> parts; | |
| 693 EXPECT_EQ(string16(), JoinString(parts, separator)); | |
| 694 | |
| 695 parts.push_back(ASCIIToUTF16("a")); | |
| 696 EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator)); | |
| 697 | |
| 698 parts.push_back(ASCIIToUTF16("b")); | |
| 699 parts.push_back(ASCIIToUTF16("c")); | |
| 700 EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator)); | |
| 701 | |
| 702 parts.push_back(ASCIIToUTF16("")); | |
| 703 EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator)); | |
| 704 parts.push_back(ASCIIToUTF16(" ")); | |
| 705 EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|"))); | |
| 706 } | |
| 707 | |
| 708 TEST(StringUtilTest, StartsWith) { | |
| 709 EXPECT_TRUE( | |
| 710 StartsWith("javascript:url", "javascript", base::CompareCase::SENSITIVE)); | |
| 711 EXPECT_FALSE( | |
| 712 StartsWith("JavaScript:url", "javascript", base::CompareCase::SENSITIVE)); | |
| 713 EXPECT_TRUE(StartsWith("javascript:url", "javascript", | |
| 714 base::CompareCase::INSENSITIVE_ASCII)); | |
| 715 EXPECT_TRUE(StartsWith("JavaScript:url", "javascript", | |
| 716 base::CompareCase::INSENSITIVE_ASCII)); | |
| 717 EXPECT_FALSE(StartsWith("java", "javascript", base::CompareCase::SENSITIVE)); | |
| 718 EXPECT_FALSE( | |
| 719 StartsWith("java", "javascript", base::CompareCase::INSENSITIVE_ASCII)); | |
| 720 EXPECT_FALSE(StartsWith(std::string(), "javascript", | |
| 721 base::CompareCase::INSENSITIVE_ASCII)); | |
| 722 EXPECT_FALSE( | |
| 723 StartsWith(std::string(), "javascript", base::CompareCase::SENSITIVE)); | |
| 724 EXPECT_TRUE( | |
| 725 StartsWith("java", std::string(), base::CompareCase::INSENSITIVE_ASCII)); | |
| 726 EXPECT_TRUE(StartsWith("java", std::string(), base::CompareCase::SENSITIVE)); | |
| 727 | |
| 728 EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"), | |
| 729 ASCIIToUTF16("javascript"), | |
| 730 base::CompareCase::SENSITIVE)); | |
| 731 EXPECT_FALSE(StartsWith(ASCIIToUTF16("JavaScript:url"), | |
| 732 ASCIIToUTF16("javascript"), | |
| 733 base::CompareCase::SENSITIVE)); | |
| 734 EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"), | |
| 735 ASCIIToUTF16("javascript"), | |
| 736 base::CompareCase::INSENSITIVE_ASCII)); | |
| 737 EXPECT_TRUE(StartsWith(ASCIIToUTF16("JavaScript:url"), | |
| 738 ASCIIToUTF16("javascript"), | |
| 739 base::CompareCase::INSENSITIVE_ASCII)); | |
| 740 EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"), ASCIIToUTF16("javascript"), | |
| 741 base::CompareCase::SENSITIVE)); | |
| 742 EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"), ASCIIToUTF16("javascript"), | |
| 743 base::CompareCase::INSENSITIVE_ASCII)); | |
| 744 EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), | |
| 745 base::CompareCase::INSENSITIVE_ASCII)); | |
| 746 EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), | |
| 747 base::CompareCase::SENSITIVE)); | |
| 748 EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), | |
| 749 base::CompareCase::INSENSITIVE_ASCII)); | |
| 750 EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), | |
| 751 base::CompareCase::SENSITIVE)); | |
| 752 } | |
| 753 | |
| 754 TEST(StringUtilTest, EndsWith) { | |
| 755 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), ASCIIToUTF16(".plugin"), | |
| 756 base::CompareCase::SENSITIVE)); | |
| 757 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.Plugin"), ASCIIToUTF16(".plugin"), | |
| 758 base::CompareCase::SENSITIVE)); | |
| 759 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), ASCIIToUTF16(".plugin"), | |
| 760 base::CompareCase::INSENSITIVE_ASCII)); | |
| 761 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.Plugin"), ASCIIToUTF16(".plugin"), | |
| 762 base::CompareCase::INSENSITIVE_ASCII)); | |
| 763 EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), | |
| 764 base::CompareCase::SENSITIVE)); | |
| 765 EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), | |
| 766 base::CompareCase::INSENSITIVE_ASCII)); | |
| 767 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"), ASCIIToUTF16(".plugin"), | |
| 768 base::CompareCase::SENSITIVE)); | |
| 769 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"), ASCIIToUTF16(".plugin"), | |
| 770 base::CompareCase::INSENSITIVE_ASCII)); | |
| 771 EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), | |
| 772 base::CompareCase::INSENSITIVE_ASCII)); | |
| 773 EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), | |
| 774 base::CompareCase::SENSITIVE)); | |
| 775 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), | |
| 776 base::CompareCase::INSENSITIVE_ASCII)); | |
| 777 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), | |
| 778 base::CompareCase::SENSITIVE)); | |
| 779 EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"), | |
| 780 base::CompareCase::INSENSITIVE_ASCII)); | |
| 781 EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"), | |
| 782 base::CompareCase::SENSITIVE)); | |
| 783 EXPECT_TRUE( | |
| 784 EndsWith(string16(), string16(), base::CompareCase::INSENSITIVE_ASCII)); | |
| 785 EXPECT_TRUE(EndsWith(string16(), string16(), base::CompareCase::SENSITIVE)); | |
| 786 } | |
| 787 | |
| 788 TEST(StringUtilTest, GetStringFWithOffsets) { | |
| 789 std::vector<string16> subst; | |
| 790 subst.push_back(ASCIIToUTF16("1")); | |
| 791 subst.push_back(ASCIIToUTF16("2")); | |
| 792 std::vector<size_t> offsets; | |
| 793 | |
| 794 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."), | |
| 795 subst, | |
| 796 &offsets); | |
| 797 EXPECT_EQ(2U, offsets.size()); | |
| 798 EXPECT_EQ(7U, offsets[0]); | |
| 799 EXPECT_EQ(25U, offsets[1]); | |
| 800 offsets.clear(); | |
| 801 | |
| 802 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."), | |
| 803 subst, | |
| 804 &offsets); | |
| 805 EXPECT_EQ(2U, offsets.size()); | |
| 806 EXPECT_EQ(25U, offsets[0]); | |
| 807 EXPECT_EQ(7U, offsets[1]); | |
| 808 offsets.clear(); | |
| 809 } | |
| 810 | |
| 811 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) { | |
| 812 // Test whether replacestringplaceholders works as expected when there | |
| 813 // are fewer inputs than outputs. | |
| 814 std::vector<string16> subst; | |
| 815 subst.push_back(ASCIIToUTF16("9a")); | |
| 816 subst.push_back(ASCIIToUTF16("8b")); | |
| 817 subst.push_back(ASCIIToUTF16("7c")); | |
| 818 | |
| 819 string16 formatted = | |
| 820 ReplaceStringPlaceholders( | |
| 821 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL); | |
| 822 | |
| 823 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci")); | |
| 824 } | |
| 825 | |
| 826 TEST(StringUtilTest, ReplaceStringPlaceholders) { | |
| 827 std::vector<string16> subst; | |
| 828 subst.push_back(ASCIIToUTF16("9a")); | |
| 829 subst.push_back(ASCIIToUTF16("8b")); | |
| 830 subst.push_back(ASCIIToUTF16("7c")); | |
| 831 subst.push_back(ASCIIToUTF16("6d")); | |
| 832 subst.push_back(ASCIIToUTF16("5e")); | |
| 833 subst.push_back(ASCIIToUTF16("4f")); | |
| 834 subst.push_back(ASCIIToUTF16("3g")); | |
| 835 subst.push_back(ASCIIToUTF16("2h")); | |
| 836 subst.push_back(ASCIIToUTF16("1i")); | |
| 837 | |
| 838 string16 formatted = | |
| 839 ReplaceStringPlaceholders( | |
| 840 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL); | |
| 841 | |
| 842 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii")); | |
| 843 } | |
| 844 | |
| 845 TEST(StringUtilTest, ReplaceStringPlaceholdersMoreThan9Replacements) { | |
| 846 std::vector<string16> subst; | |
| 847 subst.push_back(ASCIIToUTF16("9a")); | |
| 848 subst.push_back(ASCIIToUTF16("8b")); | |
| 849 subst.push_back(ASCIIToUTF16("7c")); | |
| 850 subst.push_back(ASCIIToUTF16("6d")); | |
| 851 subst.push_back(ASCIIToUTF16("5e")); | |
| 852 subst.push_back(ASCIIToUTF16("4f")); | |
| 853 subst.push_back(ASCIIToUTF16("3g")); | |
| 854 subst.push_back(ASCIIToUTF16("2h")); | |
| 855 subst.push_back(ASCIIToUTF16("1i")); | |
| 856 subst.push_back(ASCIIToUTF16("0j")); | |
| 857 subst.push_back(ASCIIToUTF16("-1k")); | |
| 858 subst.push_back(ASCIIToUTF16("-2l")); | |
| 859 subst.push_back(ASCIIToUTF16("-3m")); | |
| 860 subst.push_back(ASCIIToUTF16("-4n")); | |
| 861 | |
| 862 string16 formatted = | |
| 863 ReplaceStringPlaceholders( | |
| 864 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i," | |
| 865 "$10j,$11k,$12l,$13m,$14n,$1"), subst, NULL); | |
| 866 | |
| 867 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh," | |
| 868 "1ii,0jj,-1kk,-2ll,-3mm,-4nn,9a")); | |
| 869 } | |
| 870 | |
| 871 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) { | |
| 872 std::vector<std::string> subst; | |
| 873 subst.push_back("9a"); | |
| 874 subst.push_back("8b"); | |
| 875 subst.push_back("7c"); | |
| 876 subst.push_back("6d"); | |
| 877 subst.push_back("5e"); | |
| 878 subst.push_back("4f"); | |
| 879 subst.push_back("3g"); | |
| 880 subst.push_back("2h"); | |
| 881 subst.push_back("1i"); | |
| 882 | |
| 883 std::string formatted = | |
| 884 ReplaceStringPlaceholders( | |
| 885 "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL); | |
| 886 | |
| 887 EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"); | |
| 888 } | |
| 889 | |
| 890 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) { | |
| 891 std::vector<std::string> subst; | |
| 892 subst.push_back("a"); | |
| 893 subst.push_back("b"); | |
| 894 subst.push_back("c"); | |
| 895 EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL), | |
| 896 "$1 $$2 $$$3"); | |
| 897 } | |
| 898 | |
| 899 TEST(StringUtilTest, LcpyTest) { | |
| 900 // Test the normal case where we fit in our buffer. | |
| 901 { | |
| 902 char dst[10]; | |
| 903 wchar_t wdst[10]; | |
| 904 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst))); | |
| 905 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8)); | |
| 906 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst))); | |
| 907 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8)); | |
| 908 } | |
| 909 | |
| 910 // Test dst_size == 0, nothing should be written to |dst| and we should | |
| 911 // have the equivalent of strlen(src). | |
| 912 { | |
| 913 char dst[2] = {1, 2}; | |
| 914 wchar_t wdst[2] = {1, 2}; | |
| 915 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", 0)); | |
| 916 EXPECT_EQ(1, dst[0]); | |
| 917 EXPECT_EQ(2, dst[1]); | |
| 918 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", 0)); | |
| 919 EXPECT_EQ(static_cast<wchar_t>(1), wdst[0]); | |
| 920 EXPECT_EQ(static_cast<wchar_t>(2), wdst[1]); | |
| 921 } | |
| 922 | |
| 923 // Test the case were we _just_ competely fit including the null. | |
| 924 { | |
| 925 char dst[8]; | |
| 926 wchar_t wdst[8]; | |
| 927 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst))); | |
| 928 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8)); | |
| 929 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst))); | |
| 930 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8)); | |
| 931 } | |
| 932 | |
| 933 // Test the case were we we are one smaller, so we can't fit the null. | |
| 934 { | |
| 935 char dst[7]; | |
| 936 wchar_t wdst[7]; | |
| 937 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst))); | |
| 938 EXPECT_EQ(0, memcmp(dst, "abcdef", 7)); | |
| 939 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst))); | |
| 940 EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7)); | |
| 941 } | |
| 942 | |
| 943 // Test the case were we are just too small. | |
| 944 { | |
| 945 char dst[3]; | |
| 946 wchar_t wdst[3]; | |
| 947 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst))); | |
| 948 EXPECT_EQ(0, memcmp(dst, "ab", 3)); | |
| 949 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst))); | |
| 950 EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3)); | |
| 951 } | |
| 952 } | |
| 953 | |
| 954 TEST(StringUtilTest, WprintfFormatPortabilityTest) { | |
| 955 static const struct { | |
| 956 const wchar_t* input; | |
| 957 bool portable; | |
| 958 } cases[] = { | |
| 959 { L"%ls", true }, | |
| 960 { L"%s", false }, | |
| 961 { L"%S", false }, | |
| 962 { L"%lS", false }, | |
| 963 { L"Hello, %s", false }, | |
| 964 { L"%lc", true }, | |
| 965 { L"%c", false }, | |
| 966 { L"%C", false }, | |
| 967 { L"%lC", false }, | |
| 968 { L"%ls %s", false }, | |
| 969 { L"%s %ls", false }, | |
| 970 { L"%s %ls %s", false }, | |
| 971 { L"%f", true }, | |
| 972 { L"%f %F", false }, | |
| 973 { L"%d %D", false }, | |
| 974 { L"%o %O", false }, | |
| 975 { L"%u %U", false }, | |
| 976 { L"%f %d %o %u", true }, | |
| 977 { L"%-8d (%02.1f%)", true }, | |
| 978 { L"% 10s", false }, | |
| 979 { L"% 10ls", true } | |
| 980 }; | |
| 981 for (size_t i = 0; i < arraysize(cases); ++i) | |
| 982 EXPECT_EQ(cases[i].portable, IsWprintfFormatPortable(cases[i].input)); | |
| 983 } | |
| 984 | |
| 985 TEST(StringUtilTest, RemoveChars) { | |
| 986 const char kRemoveChars[] = "-/+*"; | |
| 987 std::string input = "A-+bc/d!*"; | |
| 988 EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input)); | |
| 989 EXPECT_EQ("Abcd!", input); | |
| 990 | |
| 991 // No characters match kRemoveChars. | |
| 992 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input)); | |
| 993 EXPECT_EQ("Abcd!", input); | |
| 994 | |
| 995 // Empty string. | |
| 996 input.clear(); | |
| 997 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input)); | |
| 998 EXPECT_EQ(std::string(), input); | |
| 999 } | |
| 1000 | |
| 1001 TEST(StringUtilTest, ReplaceChars) { | |
| 1002 struct TestData { | |
| 1003 const char* input; | |
| 1004 const char* replace_chars; | |
| 1005 const char* replace_with; | |
| 1006 const char* output; | |
| 1007 bool result; | |
| 1008 } cases[] = { | |
| 1009 { "", "", "", "", false }, | |
| 1010 { "test", "", "", "test", false }, | |
| 1011 { "test", "", "!", "test", false }, | |
| 1012 { "test", "z", "!", "test", false }, | |
| 1013 { "test", "e", "!", "t!st", true }, | |
| 1014 { "test", "e", "!?", "t!?st", true }, | |
| 1015 { "test", "ez", "!", "t!st", true }, | |
| 1016 { "test", "zed", "!?", "t!?st", true }, | |
| 1017 { "test", "t", "!?", "!?es!?", true }, | |
| 1018 { "test", "et", "!>", "!>!>s!>", true }, | |
| 1019 { "test", "zest", "!", "!!!!", true }, | |
| 1020 { "test", "szt", "!", "!e!!", true }, | |
| 1021 { "test", "t", "test", "testestest", true }, | |
| 1022 }; | |
| 1023 | |
| 1024 for (size_t i = 0; i < arraysize(cases); ++i) { | |
| 1025 std::string output; | |
| 1026 bool result = ReplaceChars(cases[i].input, | |
| 1027 cases[i].replace_chars, | |
| 1028 cases[i].replace_with, | |
| 1029 &output); | |
| 1030 EXPECT_EQ(cases[i].result, result); | |
| 1031 EXPECT_EQ(cases[i].output, output); | |
| 1032 } | |
| 1033 } | |
| 1034 | |
| 1035 TEST(StringUtilTest, ContainsOnlyChars) { | |
| 1036 // Providing an empty list of characters should return false but for the empty | |
| 1037 // string. | |
| 1038 EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string())); | |
| 1039 EXPECT_FALSE(ContainsOnlyChars("Hello", std::string())); | |
| 1040 | |
| 1041 EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234")); | |
| 1042 EXPECT_TRUE(ContainsOnlyChars("1", "1234")); | |
| 1043 EXPECT_TRUE(ContainsOnlyChars("1", "4321")); | |
| 1044 EXPECT_TRUE(ContainsOnlyChars("123", "4321")); | |
| 1045 EXPECT_FALSE(ContainsOnlyChars("123a", "4321")); | |
| 1046 | |
| 1047 EXPECT_TRUE(ContainsOnlyChars(std::string(), kWhitespaceASCII)); | |
| 1048 EXPECT_TRUE(ContainsOnlyChars(" ", kWhitespaceASCII)); | |
| 1049 EXPECT_TRUE(ContainsOnlyChars("\t", kWhitespaceASCII)); | |
| 1050 EXPECT_TRUE(ContainsOnlyChars("\t \r \n ", kWhitespaceASCII)); | |
| 1051 EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII)); | |
| 1052 EXPECT_FALSE(ContainsOnlyChars("\thello\r \n ", kWhitespaceASCII)); | |
| 1053 | |
| 1054 EXPECT_TRUE(ContainsOnlyChars(string16(), kWhitespaceUTF16)); | |
| 1055 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16(" "), kWhitespaceUTF16)); | |
| 1056 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t"), kWhitespaceUTF16)); | |
| 1057 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t \r \n "), kWhitespaceUTF16)); | |
| 1058 EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("a"), kWhitespaceUTF16)); | |
| 1059 EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("\thello\r \n "), | |
| 1060 kWhitespaceUTF16)); | |
| 1061 } | |
| 1062 | |
| 1063 TEST(StringUtilTest, CompareCaseInsensitiveASCII) { | |
| 1064 EXPECT_EQ(0, CompareCaseInsensitiveASCII("", "")); | |
| 1065 EXPECT_EQ(0, CompareCaseInsensitiveASCII("Asdf", "aSDf")); | |
| 1066 | |
| 1067 // Differing lengths. | |
| 1068 EXPECT_EQ(-1, CompareCaseInsensitiveASCII("Asdf", "aSDfA")); | |
| 1069 EXPECT_EQ(1, CompareCaseInsensitiveASCII("AsdfA", "aSDf")); | |
| 1070 | |
| 1071 // Differing values. | |
| 1072 EXPECT_EQ(-1, CompareCaseInsensitiveASCII("AsdfA", "aSDfb")); | |
| 1073 EXPECT_EQ(1, CompareCaseInsensitiveASCII("Asdfb", "aSDfA")); | |
| 1074 } | |
| 1075 | |
| 1076 TEST(StringUtilTest, EqualsCaseInsensitiveASCII) { | |
| 1077 EXPECT_TRUE(EqualsCaseInsensitiveASCII("", "")); | |
| 1078 EXPECT_TRUE(EqualsCaseInsensitiveASCII("Asdf", "aSDF")); | |
| 1079 EXPECT_FALSE(EqualsCaseInsensitiveASCII("bsdf", "aSDF")); | |
| 1080 EXPECT_FALSE(EqualsCaseInsensitiveASCII("Asdf", "aSDFz")); | |
| 1081 } | |
| 1082 | |
| 1083 class WriteIntoTest : public testing::Test { | |
| 1084 protected: | |
| 1085 static void WritesCorrectly(size_t num_chars) { | |
| 1086 std::string buffer; | |
| 1087 char kOriginal[] = "supercali"; | |
| 1088 strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars); | |
| 1089 // Using std::string(buffer.c_str()) instead of |buffer| truncates the | |
| 1090 // string at the first \0. | |
| 1091 EXPECT_EQ(std::string(kOriginal, | |
| 1092 std::min(num_chars, arraysize(kOriginal) - 1)), | |
| 1093 std::string(buffer.c_str())); | |
| 1094 EXPECT_EQ(num_chars, buffer.size()); | |
| 1095 } | |
| 1096 }; | |
| 1097 | |
| 1098 TEST_F(WriteIntoTest, WriteInto) { | |
| 1099 // Validate that WriteInto reserves enough space and | |
| 1100 // sizes a string correctly. | |
| 1101 WritesCorrectly(1); | |
| 1102 WritesCorrectly(2); | |
| 1103 WritesCorrectly(5000); | |
| 1104 | |
| 1105 // Validate that WriteInto doesn't modify other strings | |
| 1106 // when using a Copy-on-Write implementation. | |
| 1107 const char kLive[] = "live"; | |
| 1108 const char kDead[] = "dead"; | |
| 1109 const std::string live = kLive; | |
| 1110 std::string dead = live; | |
| 1111 strncpy(WriteInto(&dead, 5), kDead, 4); | |
| 1112 EXPECT_EQ(kDead, dead); | |
| 1113 EXPECT_EQ(4u, dead.size()); | |
| 1114 EXPECT_EQ(kLive, live); | |
| 1115 EXPECT_EQ(4u, live.size()); | |
| 1116 } | |
| 1117 | |
| 1118 } // namespace base | |
| OLD | NEW |