Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(30)

Side by Side Diff: base/strings/string_util_unittest.cc

Issue 543043002: Implement fast path in UTF8ToUTF16 for pure ASCII strings (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Rebased Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « base/strings/string_util.cc ('k') | base/strings/utf_string_conversions.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "base/strings/string_util.h" 5 #include "base/strings/string_util.h"
6 6
7 #include <math.h> 7 #include <math.h>
8 #include <stdarg.h> 8 #include <stdarg.h>
9 9
10 #include <algorithm> 10 #include <algorithm>
(...skipping 368 matching lines...) Expand 10 before | Expand all | Expand 10 after
379 // Check that we support Embedded Nulls. The first uses the canonical UTF-8 379 // Check that we support Embedded Nulls. The first uses the canonical UTF-8
380 // representation, and the second uses a 2-byte sequence. The second version 380 // representation, and the second uses a 2-byte sequence. The second version
381 // is invalid UTF-8 since UTF-8 states that the shortest encoding for a 381 // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
382 // given codepoint must be used. 382 // given codepoint must be used.
383 static const char kEmbeddedNull[] = "embedded\0null"; 383 static const char kEmbeddedNull[] = "embedded\0null";
384 EXPECT_TRUE(IsStringUTF8( 384 EXPECT_TRUE(IsStringUTF8(
385 std::string(kEmbeddedNull, sizeof(kEmbeddedNull)))); 385 std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
386 EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000")); 386 EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
387 } 387 }
388 388
389 TEST(StringUtilTest, IsStringASCII) {
390 static char char_ascii[] =
391 "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF";
392 static char16 char16_ascii[] = {
393 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'A',
394 'B', 'C', 'D', 'E', 'F', '0', '1', '2', '3', '4', '5', '6',
395 '7', '8', '9', '0', 'A', 'B', 'C', 'D', 'E', 'F', 0 };
396
397 // Test a variety of the fragment start positions and lengths in order to make
398 // sure that bit masking in IsStringASCII works correctly.
399 // Also, test that a non-ASCII character will be detected regardless of its
400 // position inside the string.
401 {
402 const size_t string_length = arraysize(char_ascii) - 1;
403 for (size_t offset = 0; offset < 8; ++offset) {
404 for (size_t len = 0, max_len = string_length - offset; len < max_len;
405 ++len) {
406 EXPECT_TRUE(IsStringASCII(StringPiece(char_ascii + offset, len)));
407 for (size_t char_pos = offset; char_pos < len; ++char_pos) {
408 char_ascii[char_pos] |= '\x80';
409 EXPECT_FALSE(IsStringASCII(StringPiece(char_ascii + offset, len)));
410 char_ascii[char_pos] &= ~'\x80';
411 }
412 }
413 }
414 }
415
416 {
417 const size_t string_length = arraysize(char16_ascii) - 1;
418 for (size_t offset = 0; offset < 4; ++offset) {
419 for (size_t len = 0, max_len = string_length - offset; len < max_len;
420 ++len) {
421 EXPECT_TRUE(IsStringASCII(StringPiece16(char16_ascii + offset, len)));
422 for (size_t char_pos = offset; char_pos < len; ++char_pos) {
423 char16_ascii[char_pos] |= 0x80;
424 EXPECT_FALSE(
425 IsStringASCII(StringPiece16(char16_ascii + offset, len)));
426 char16_ascii[char_pos] &= ~0x80;
427 // Also test when the upper half is non-zero.
428 char16_ascii[char_pos] |= 0x100;
429 EXPECT_FALSE(
430 IsStringASCII(StringPiece16(char16_ascii + offset, len)));
431 char16_ascii[char_pos] &= ~0x100;
432 }
433 }
434 }
435 }
436 }
437
389 TEST(StringUtilTest, ConvertASCII) { 438 TEST(StringUtilTest, ConvertASCII) {
390 static const char* char_cases[] = { 439 static const char* char_cases[] = {
391 "Google Video", 440 "Google Video",
392 "Hello, world\n", 441 "Hello, world\n",
393 "0123ABCDwxyz \a\b\t\r\n!+,.~" 442 "0123ABCDwxyz \a\b\t\r\n!+,.~"
394 }; 443 };
395 444
396 static const wchar_t* const wchar_cases[] = { 445 static const wchar_t* const wchar_cases[] = {
397 L"Google Video", 446 L"Google Video",
398 L"Hello, world\n", 447 L"Hello, world\n",
(...skipping 786 matching lines...) Expand 10 before | Expand all | Expand 10 after
1185 const std::string live = kLive; 1234 const std::string live = kLive;
1186 std::string dead = live; 1235 std::string dead = live;
1187 strncpy(WriteInto(&dead, 5), kDead, 4); 1236 strncpy(WriteInto(&dead, 5), kDead, 4);
1188 EXPECT_EQ(kDead, dead); 1237 EXPECT_EQ(kDead, dead);
1189 EXPECT_EQ(4u, dead.size()); 1238 EXPECT_EQ(4u, dead.size());
1190 EXPECT_EQ(kLive, live); 1239 EXPECT_EQ(kLive, live);
1191 EXPECT_EQ(4u, live.size()); 1240 EXPECT_EQ(4u, live.size());
1192 } 1241 }
1193 1242
1194 } // namespace base 1243 } // namespace base
OLDNEW
« no previous file with comments | « base/strings/string_util.cc ('k') | base/strings/utf_string_conversions.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698