base/strings/string_util_unittest.cc - Issue 543043002: Implement fast path in UTF8ToUTF16 for pure ASCII strings

Side by Side Diff: base/strings/string_util_unittest.cc

Issue 543043002: Implement fast path in UTF8ToUTF16 for pure ASCII strings (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Rebased Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2013 The Chromium Authors. All rights reserved.	1 // Copyright 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "base/strings/string_util.h"	5 #include "base/strings/string_util.h"

6	6

7 #include <math.h>	7 #include <math.h>

8 #include <stdarg.h>	8 #include <stdarg.h>

9	9

10 #include <algorithm>	10 #include <algorithm>

(...skipping 368 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
379 // Check that we support Embedded Nulls. The first uses the canonical UTF-8	379 // Check that we support Embedded Nulls. The first uses the canonical UTF-8

380 // representation, and the second uses a 2-byte sequence. The second version	380 // representation, and the second uses a 2-byte sequence. The second version

381 // is invalid UTF-8 since UTF-8 states that the shortest encoding for a	381 // is invalid UTF-8 since UTF-8 states that the shortest encoding for a

382 // given codepoint must be used.	382 // given codepoint must be used.

383 static const char kEmbeddedNull[] = "embedded\0null";	383 static const char kEmbeddedNull[] = "embedded\0null";

384 EXPECT_TRUE(IsStringUTF8(	384 EXPECT_TRUE(IsStringUTF8(

385 std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));	385 std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));

386 EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));	386 EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));

387 }	387 }

388	388

	389 TEST(StringUtilTest, IsStringASCII) {

	390 static char char_ascii[] =

	391 "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF";

	392 static char16 char16_ascii[] = {

	393 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'A',

	394 'B', 'C', 'D', 'E', 'F', '0', '1', '2', '3', '4', '5', '6',

	395 '7', '8', '9', '0', 'A', 'B', 'C', 'D', 'E', 'F', 0 };

	396

	397 // Test a variety of the fragment start positions and lengths in order to make

	398 // sure that bit masking in IsStringASCII works correctly.

	399 // Also, test that a non-ASCII character will be detected regardless of its

	400 // position inside the string.

	401 {

	402 const size_t string_length = arraysize(char_ascii) - 1;

	403 for (size_t offset = 0; offset < 8; ++offset) {

	404 for (size_t len = 0, max_len = string_length - offset; len < max_len;

	405 ++len) {

	406 EXPECT_TRUE(IsStringASCII(StringPiece(char_ascii + offset, len)));

	407 for (size_t char_pos = offset; char_pos < len; ++char_pos) {

	408 char_ascii[char_pos] \|= '\x80';

	409 EXPECT_FALSE(IsStringASCII(StringPiece(char_ascii + offset, len)));

	410 char_ascii[char_pos] &= ~'\x80';

	411 }

	412 }

	413 }

	414 }

	415

	416 {

	417 const size_t string_length = arraysize(char16_ascii) - 1;

	418 for (size_t offset = 0; offset < 4; ++offset) {

	419 for (size_t len = 0, max_len = string_length - offset; len < max_len;

	420 ++len) {

	421 EXPECT_TRUE(IsStringASCII(StringPiece16(char16_ascii + offset, len)));

	422 for (size_t char_pos = offset; char_pos < len; ++char_pos) {

	423 char16_ascii[char_pos] \|= 0x80;

	424 EXPECT_FALSE(

	425 IsStringASCII(StringPiece16(char16_ascii + offset, len)));

	426 char16_ascii[char_pos] &= ~0x80;

	427 // Also test when the upper half is non-zero.

	428 char16_ascii[char_pos] \|= 0x100;

	429 EXPECT_FALSE(

	430 IsStringASCII(StringPiece16(char16_ascii + offset, len)));

	431 char16_ascii[char_pos] &= ~0x100;

	432 }

	433 }

	434 }

	435 }

	436 }

	437

389 TEST(StringUtilTest, ConvertASCII) {	438 TEST(StringUtilTest, ConvertASCII) {

390 static const char* char_cases[] = {	439 static const char* char_cases[] = {

391 "Google Video",	440 "Google Video",

392 "Hello, world\n",	441 "Hello, world\n",

393 "0123ABCDwxyz \a\b\t\r\n!+,.~"	442 "0123ABCDwxyz \a\b\t\r\n!+,.~"

394 };	443 };

395	444

396 static const wchar_t* const wchar_cases[] = {	445 static const wchar_t* const wchar_cases[] = {

397 L"Google Video",	446 L"Google Video",

398 L"Hello, world\n",	447 L"Hello, world\n",

(...skipping 786 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1185 const std::string live = kLive;	1234 const std::string live = kLive;

1186 std::string dead = live;	1235 std::string dead = live;

1187 strncpy(WriteInto(&dead, 5), kDead, 4);	1236 strncpy(WriteInto(&dead, 5), kDead, 4);

1188 EXPECT_EQ(kDead, dead);	1237 EXPECT_EQ(kDead, dead);

1189 EXPECT_EQ(4u, dead.size());	1238 EXPECT_EQ(4u, dead.size());

1190 EXPECT_EQ(kLive, live);	1239 EXPECT_EQ(kLive, live);

1191 EXPECT_EQ(4u, live.size());	1240 EXPECT_EQ(4u, live.size());

1192 }	1241 }

1193	1242

1194 } // namespace base	1243 } // namespace base

OLD	NEW

« no previous file with comments | « base/strings/string_util.cc ('k') | base/strings/utf_string_conversions.cc » ('j') | no next file with comments »