base/strings/string_util_unittest.cc - Issue 1647803004: Move base to DEPS

Side by Side Diff: base/strings/string_util_unittest.cc

Issue 1647803004: Move base to DEPS (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 // Copyright 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #include "base/strings/string_util.h"

6

7 #include <math.h>

8 #include <stdarg.h>

9

10 #include <algorithm>

11

12 #include "base/basictypes.h"

13 #include "base/strings/string16.h"

14 #include "base/strings/utf_string_conversions.h"

15 #include "testing/gmock/include/gmock/gmock.h"

16 #include "testing/gtest/include/gtest/gtest.h"

17

18 using ::testing::ElementsAre;

19

20 namespace base {

21

22 static const struct trim_case {

23 const wchar_t* input;

24 const TrimPositions positions;

25 const wchar_t* output;

26 const TrimPositions return_value;

27 } trim_cases[] = {

28 {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},

29 {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},

30 {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},

31 {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},

32 {L"", TRIM_ALL, L"", TRIM_NONE},

33 {L" ", TRIM_LEADING, L"", TRIM_LEADING},

34 {L" ", TRIM_TRAILING, L"", TRIM_TRAILING},

35 {L" ", TRIM_ALL, L"", TRIM_ALL},

36 {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},

37 {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},

38 };

39

40 static const struct trim_case_ascii {

41 const char* input;

42 const TrimPositions positions;

43 const char* output;

44 const TrimPositions return_value;

45 } trim_cases_ascii[] = {

46 {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},

47 {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},

48 {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},

49 {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},

50 {"", TRIM_ALL, "", TRIM_NONE},

51 {" ", TRIM_LEADING, "", TRIM_LEADING},

52 {" ", TRIM_TRAILING, "", TRIM_TRAILING},

53 {" ", TRIM_ALL, "", TRIM_ALL},

54 {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},

55 };

56

57 namespace {

58

59 // Helper used to test TruncateUTF8ToByteSize.

60 bool Truncated(const std::string& input,

61 const size_t byte_size,

62 std::string* output) {

63 size_t prev = input.length();

64 TruncateUTF8ToByteSize(input, byte_size, output);

65 return prev != output->length();

66 }

67

68 } // namespace

69

70 TEST(StringUtilTest, TruncateUTF8ToByteSize) {

71 std::string output;

72

73 // Empty strings and invalid byte_size arguments

74 EXPECT_FALSE(Truncated(std::string(), 0, &output));

75 EXPECT_EQ(output, "");

76 EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));

77 EXPECT_EQ(output, "");

78 EXPECT_FALSE(Truncated("\xe1\x80\xbf", static_cast<size_t>(-1), &output));

79 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));

80

81 // Testing the truncation of valid UTF8 correctly

82 EXPECT_TRUE(Truncated("abc", 2, &output));

83 EXPECT_EQ(output, "ab");

84 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));

85 EXPECT_EQ(output.compare("\xc2\x81"), 0);

86 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));

87 EXPECT_EQ(output.compare("\xc2\x81"), 0);

88 EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));

89 EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);

90

91 {

92 const char array[] = "\x00\x00\xc2\x81\xc2\x81";

93 const std::string array_string(array, arraysize(array));

94 EXPECT_TRUE(Truncated(array_string, 4, &output));

95 EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);

96 }

97

98 {

99 const char array[] = "\x00\xc2\x81\xc2\x81";

100 const std::string array_string(array, arraysize(array));

101 EXPECT_TRUE(Truncated(array_string, 4, &output));

102 EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);

103 }

104

105 // Testing invalid UTF8

106 EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));

107 EXPECT_EQ(output.compare(""), 0);

108 EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));

109 EXPECT_EQ(output.compare(""), 0);

110 EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));

111 EXPECT_EQ(output.compare(""), 0);

112

113 // Testing invalid UTF8 mixed with valid UTF8

114 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));

115 EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);

116 EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));

117 EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);

118 EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",

119 10, &output));

120 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);

121 EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",

122 10, &output));

123 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);

124 EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));

125 EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);

126

127 // Overlong sequences

128 EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));

129 EXPECT_EQ(output.compare(""), 0);

130 EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));

131 EXPECT_EQ(output.compare(""), 0);

132 EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));

133 EXPECT_EQ(output.compare(""), 0);

134 EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));

135 EXPECT_EQ(output.compare(""), 0);

136 EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));

137 EXPECT_EQ(output.compare(""), 0);

138 EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));

139 EXPECT_EQ(output.compare(""), 0);

140 EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));

141 EXPECT_EQ(output.compare(""), 0);

142 EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));

143 EXPECT_EQ(output.compare(""), 0);

144 EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));

145 EXPECT_EQ(output.compare(""), 0);

146 EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));

147 EXPECT_EQ(output.compare(""), 0);

148 EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));

149 EXPECT_EQ(output.compare(""), 0);

150

151 // Beyond U+10FFFF (the upper limit of Unicode codespace)

152 EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));

153 EXPECT_EQ(output.compare(""), 0);

154 EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));

155 EXPECT_EQ(output.compare(""), 0);

156 EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));

157 EXPECT_EQ(output.compare(""), 0);

158

159 // BOMs in UTF-16(BE\|LE) and UTF-32(BE\|LE)

160 EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));

161 EXPECT_EQ(output.compare(""), 0);

162 EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));

163 EXPECT_EQ(output.compare(""), 0);

164

165 {

166 const char array[] = "\x00\x00\xfe\xff";

167 const std::string array_string(array, arraysize(array));

168 EXPECT_TRUE(Truncated(array_string, 4, &output));

169 EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);

170 }

171

172 // Variants on the previous test

173 {

174 const char array[] = "\xff\xfe\x00\x00";

175 const std::string array_string(array, 4);

176 EXPECT_FALSE(Truncated(array_string, 4, &output));

177 EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);

178 }

179 {

180 const char array[] = "\xff\x00\x00\xfe";

181 const std::string array_string(array, arraysize(array));

182 EXPECT_TRUE(Truncated(array_string, 4, &output));

183 EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);

184 }

185

186 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>

187 EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));

188 EXPECT_EQ(output.compare(""), 0);

189 EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));

190 EXPECT_EQ(output.compare(""), 0);

191 EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));

192 EXPECT_EQ(output.compare(""), 0);

193 EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));

194 EXPECT_EQ(output.compare(""), 0);

195 EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));

196 EXPECT_EQ(output.compare(""), 0);

197

198 // Strings in legacy encodings that are valid in UTF-8, but

199 // are invalid as UTF-8 in real data.

200 EXPECT_TRUE(Truncated("caf\xe9", 4, &output));

201 EXPECT_EQ(output.compare("caf"), 0);

202 EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));

203 EXPECT_EQ(output.compare(""), 0);

204 EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));

205 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);

206 EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,

207 &output));

208 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);

209

210 // Testing using the same string as input and output.

211 EXPECT_FALSE(Truncated(output, 4, &output));

212 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);

213 EXPECT_TRUE(Truncated(output, 3, &output));

214 EXPECT_EQ(output.compare("\xa7\x41"), 0);

215

216 // "abc" with U+201[CD] in windows-125[0-8]

217 EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));

218 EXPECT_EQ(output.compare("\x93" "abc"), 0);

219

220 // U+0639 U+064E U+0644 U+064E in ISO-8859-6

221 EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));

222 EXPECT_EQ(output.compare(""), 0);

223

224 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7

225 EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));

226 EXPECT_EQ(output.compare(""), 0);

227 }

228

229 TEST(StringUtilTest, TrimWhitespace) {

230 string16 output; // Allow contents to carry over to next testcase

231 for (size_t i = 0; i < arraysize(trim_cases); ++i) {

232 const trim_case& value = trim_cases[i];

233 EXPECT_EQ(value.return_value,

234 TrimWhitespace(WideToUTF16(value.input), value.positions,

235 &output));

236 EXPECT_EQ(WideToUTF16(value.output), output);

237 }

238

239 // Test that TrimWhitespace() can take the same string for input and output

240 output = ASCIIToUTF16(" This is a test \r\n");

241 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));

242 EXPECT_EQ(ASCIIToUTF16("This is a test"), output);

243

244 // Once more, but with a string of whitespace

245 output = ASCIIToUTF16(" \r\n");

246 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));

247 EXPECT_EQ(string16(), output);

248

249 std::string output_ascii;

250 for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {

251 const trim_case_ascii& value = trim_cases_ascii[i];

252 EXPECT_EQ(value.return_value,

253 TrimWhitespace(value.input, value.positions, &output_ascii));

254 EXPECT_EQ(value.output, output_ascii);

255 }

256 }

257

258 static const struct collapse_case {

259 const wchar_t* input;

260 const bool trim;

261 const wchar_t* output;

262 } collapse_cases[] = {

263 {L" Google Video ", false, L"Google Video"},

264 {L"Google Video", false, L"Google Video"},

265 {L"", false, L""},

266 {L" ", false, L""},

267 {L"\t\rTest String\n", false, L"Test String"},

268 {L"\x2002Test String\x00A0\x3000", false, L"Test String"},

269 {L" Test \n \t String ", false, L"Test String"},

270 {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},

271 {L" Test String", false, L"Test String"},

272 {L"Test String ", false, L"Test String"},

273 {L"Test String", false, L"Test String"},

274 {L"", true, L""},

275 {L"\n", true, L""},

276 {L" \r ", true, L""},

277 {L"\nFoo", true, L"Foo"},

278 {L"\r Foo ", true, L"Foo"},

279 {L" Foo bar ", true, L"Foo bar"},

280 {L" \tFoo bar \n", true, L"Foo bar"},

281 {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},

282 };

283

284 TEST(StringUtilTest, CollapseWhitespace) {

285 for (size_t i = 0; i < arraysize(collapse_cases); ++i) {

286 const collapse_case& value = collapse_cases[i];

287 EXPECT_EQ(WideToUTF16(value.output),

288 CollapseWhitespace(WideToUTF16(value.input), value.trim));

289 }

290 }

291

292 static const struct collapse_case_ascii {

293 const char* input;

294 const bool trim;

295 const char* output;

296 } collapse_cases_ascii[] = {

297 {" Google Video ", false, "Google Video"},

298 {"Google Video", false, "Google Video"},

299 {"", false, ""},

300 {" ", false, ""},

301 {"\t\rTest String\n", false, "Test String"},

302 {" Test \n \t String ", false, "Test String"},

303 {" Test String", false, "Test String"},

304 {"Test String ", false, "Test String"},

305 {"Test String", false, "Test String"},

306 {"", true, ""},

307 {"\n", true, ""},

308 {" \r ", true, ""},

309 {"\nFoo", true, "Foo"},

310 {"\r Foo ", true, "Foo"},

311 {" Foo bar ", true, "Foo bar"},

312 {" \tFoo bar \n", true, "Foo bar"},

313 {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},

314 };

315

316 TEST(StringUtilTest, CollapseWhitespaceASCII) {

317 for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {

318 const collapse_case_ascii& value = collapse_cases_ascii[i];

319 EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));

320 }

321 }

322

323 TEST(StringUtilTest, IsStringUTF8) {

324 EXPECT_TRUE(IsStringUTF8("abc"));

325 EXPECT_TRUE(IsStringUTF8("\xc2\x81"));

326 EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));

327 EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));

328 EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));

329 EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM

330

331 // surrogate code points

332 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));

333 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));

334 EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));

335

336 // overlong sequences

337 EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000

338 EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB"

339 EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000

340 EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080

341 EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff

342 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D

343 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091

344 EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800

345 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM)

346 EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F

347 EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5

348

349 // Beyond U+10FFFF (the upper limit of Unicode codespace)

350 EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000

351 EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes

352 EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes

353

354 // BOMs in UTF-16(BE\|LE) and UTF-32(BE\|LE)

355 EXPECT_FALSE(IsStringUTF8("\xfe\xff"));

356 EXPECT_FALSE(IsStringUTF8("\xff\xfe"));

357 EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));

358 EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));

359

360 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>

361 EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE)

362 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE

363 EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF

364 EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0

365 EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF

366 // Strings in legacy encodings. We can certainly make up strings

367 // in a legacy encoding that are valid in UTF-8, but in real data,

368 // most of them are invalid as UTF-8.

369 EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1

370 EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR

371 EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5

372 // "abc" with U+201[CD] in windows-125[0-8]

373 EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));

374 // U+0639 U+064E U+0644 U+064E in ISO-8859-6

375 EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));

376 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7

377 EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));

378

379 // Check that we support Embedded Nulls. The first uses the canonical UTF-8

380 // representation, and the second uses a 2-byte sequence. The second version

381 // is invalid UTF-8 since UTF-8 states that the shortest encoding for a

382 // given codepoint must be used.

383 static const char kEmbeddedNull[] = "embedded\0null";

384 EXPECT_TRUE(IsStringUTF8(

385 std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));

386 EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));

387 }

388

389 TEST(StringUtilTest, IsStringASCII) {

390 static char char_ascii[] =

391 "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF";

392 static char16 char16_ascii[] = {

393 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'A',

394 'B', 'C', 'D', 'E', 'F', '0', '1', '2', '3', '4', '5', '6',

395 '7', '8', '9', '0', 'A', 'B', 'C', 'D', 'E', 'F', 0 };

396 static std::wstring wchar_ascii(

397 L"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF");

398

399 // Test a variety of the fragment start positions and lengths in order to make

400 // sure that bit masking in IsStringASCII works correctly.

401 // Also, test that a non-ASCII character will be detected regardless of its

402 // position inside the string.

403 {

404 const size_t string_length = arraysize(char_ascii) - 1;

405 for (size_t offset = 0; offset < 8; ++offset) {

406 for (size_t len = 0, max_len = string_length - offset; len < max_len;

407 ++len) {

408 EXPECT_TRUE(IsStringASCII(StringPiece(char_ascii + offset, len)));

409 for (size_t char_pos = offset; char_pos < len; ++char_pos) {

410 char_ascii[char_pos] \|= '\x80';

411 EXPECT_FALSE(IsStringASCII(StringPiece(char_ascii + offset, len)));

412 char_ascii[char_pos] &= ~'\x80';

413 }

414 }

415 }

416 }

417

418 {

419 const size_t string_length = arraysize(char16_ascii) - 1;

420 for (size_t offset = 0; offset < 4; ++offset) {

421 for (size_t len = 0, max_len = string_length - offset; len < max_len;

422 ++len) {

423 EXPECT_TRUE(IsStringASCII(StringPiece16(char16_ascii + offset, len)));

424 for (size_t char_pos = offset; char_pos < len; ++char_pos) {

425 char16_ascii[char_pos] \|= 0x80;

426 EXPECT_FALSE(

427 IsStringASCII(StringPiece16(char16_ascii + offset, len)));

428 char16_ascii[char_pos] &= ~0x80;

429 // Also test when the upper half is non-zero.

430 char16_ascii[char_pos] \|= 0x100;

431 EXPECT_FALSE(

432 IsStringASCII(StringPiece16(char16_ascii + offset, len)));

433 char16_ascii[char_pos] &= ~0x100;

434 }

435 }

436 }

437 }

438

439 {

440 const size_t string_length = wchar_ascii.length();

441 for (size_t len = 0; len < string_length; ++len) {

442 EXPECT_TRUE(IsStringASCII(wchar_ascii.substr(0, len)));

443 for (size_t char_pos = 0; char_pos < len; ++char_pos) {

444 wchar_ascii[char_pos] \|= 0x80;

445 EXPECT_FALSE(

446 IsStringASCII(wchar_ascii.substr(0, len)));

447 wchar_ascii[char_pos] &= ~0x80;

448 wchar_ascii[char_pos] \|= 0x100;

449 EXPECT_FALSE(

450 IsStringASCII(wchar_ascii.substr(0, len)));

451 wchar_ascii[char_pos] &= ~0x100;

452 #if defined(WCHAR_T_IS_UTF32)

453 wchar_ascii[char_pos] \|= 0x10000;

454 EXPECT_FALSE(

455 IsStringASCII(wchar_ascii.substr(0, len)));

456 wchar_ascii[char_pos] &= ~0x10000;

457 #endif // WCHAR_T_IS_UTF32

458 }

459 }

460 }

461 }

462

463 TEST(StringUtilTest, ConvertASCII) {

464 static const char* const char_cases[] = {

465 "Google Video",

466 "Hello, world\n",

467 "0123ABCDwxyz \a\b\t\r\n!+,.~"

468 };

469

470 static const wchar_t* const wchar_cases[] = {

471 L"Google Video",

472 L"Hello, world\n",

473 L"0123ABCDwxyz \a\b\t\r\n!+,.~"

474 };

475

476 for (size_t i = 0; i < arraysize(char_cases); ++i) {

477 EXPECT_TRUE(IsStringASCII(char_cases[i]));

478 string16 utf16 = ASCIIToUTF16(char_cases[i]);

479 EXPECT_EQ(WideToUTF16(wchar_cases[i]), utf16);

480

481 std::string ascii = UTF16ToASCII(WideToUTF16(wchar_cases[i]));

482 EXPECT_EQ(char_cases[i], ascii);

483 }

484

485 EXPECT_FALSE(IsStringASCII("Google \x80Video"));

486

487 // Convert empty strings.

488 string16 empty16;

489 std::string empty;

490 EXPECT_EQ(empty, UTF16ToASCII(empty16));

491 EXPECT_EQ(empty16, ASCIIToUTF16(empty));

492

493 // Convert strings with an embedded NUL character.

494 const char chars_with_nul[] = "test\0string";

495 const int length_with_nul = arraysize(chars_with_nul) - 1;

496 std::string string_with_nul(chars_with_nul, length_with_nul);

497 string16 string16_with_nul = ASCIIToUTF16(string_with_nul);

498 EXPECT_EQ(static_cast<string16::size_type>(length_with_nul),

499 string16_with_nul.length());

500 std::string narrow_with_nul = UTF16ToASCII(string16_with_nul);

501 EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),

502 narrow_with_nul.length());

503 EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));

504 }

505

506 TEST(StringUtilTest, ToUpperASCII) {

507 EXPECT_EQ('C', ToUpperASCII('C'));

508 EXPECT_EQ('C', ToUpperASCII('c'));

509 EXPECT_EQ('2', ToUpperASCII('2'));

510

511 EXPECT_EQ(L'C', ToUpperASCII(L'C'));

512 EXPECT_EQ(L'C', ToUpperASCII(L'c'));

513 EXPECT_EQ(L'2', ToUpperASCII(L'2'));

514

515 std::string in_place_a("Cc2");

516 StringToUpperASCII(&in_place_a);

517 EXPECT_EQ("CC2", in_place_a);

518

519 std::wstring in_place_w(L"Cc2");

520 StringToUpperASCII(&in_place_w);

521 EXPECT_EQ(L"CC2", in_place_w);

522

523 std::string original_a("Cc2");

524 std::string upper_a = StringToUpperASCII(original_a);

525 EXPECT_EQ("CC2", upper_a);

526

527 std::wstring original_w(L"Cc2");

528 std::wstring upper_w = StringToUpperASCII(original_w);

529 EXPECT_EQ(L"CC2", upper_w);

530 }

531

532 TEST(StringUtilTest, LowerCaseEqualsASCII) {

533 static const struct {

534 const char* src_a;

535 const char* dst;

536 } lowercase_cases[] = {

537 { "FoO", "foo" },

538 { "foo", "foo" },

539 { "FOO", "foo" },

540 };

541

542 for (size_t i = 0; i < arraysize(lowercase_cases); ++i) {

543 EXPECT_TRUE(LowerCaseEqualsASCII(ASCIIToUTF16(lowercase_cases[i].src_a),

544 lowercase_cases[i].dst));

545 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,

546 lowercase_cases[i].dst));

547 }

548 }

549

550 TEST(StringUtilTest, FormatBytesUnlocalized) {

551 static const struct {

552 int64 bytes;

553 const char* expected;

554 } cases[] = {

555 // Expected behavior: we show one post-decimal digit when we have

556 // under two pre-decimal digits, except in cases where it makes no

557 // sense (zero or bytes).

558 // Since we switch units once we cross the 1000 mark, this keeps

559 // the display of file sizes or bytes consistently around three

560 // digits.

561 {0, "0 B"},

562 {512, "512 B"},

563 {1024*1024, "1.0 MB"},

564 {102410241024, "1.0 GB"},

565 {10LL10241024*1024, "10.0 GB"},

566 {99LL10241024*1024, "99.0 GB"},

567 {105LL10241024*1024, "105 GB"},

568 {105LL102410241024 + 500LL1024*1024, "105 GB"},

569 {~(1LL << 63), "8192 PB"},

570

571 {99*1024 + 103, "99.1 kB"},

572 {1024*1024 + 103, "1.0 MB"},

573 {10241024 + 205 1024, "1.2 MB"},

574 {102410241024 + (927 * 1024*1024), "1.9 GB"},

575 {10LL10241024*1024, "10.0 GB"},

576 {100LL10241024*1024, "100 GB"},

577 };

578

579 for (size_t i = 0; i < arraysize(cases); ++i) {

580 EXPECT_EQ(ASCIIToUTF16(cases[i].expected),

581 FormatBytesUnlocalized(cases[i].bytes));

582 }

583 }

584 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {

585 static const struct {

586 const char* str;

587 string16::size_type start_offset;

588 const char* find_this;

589 const char* replace_with;

590 const char* expected;

591 } cases[] = {

592 {"aaa", 0, "a", "b", "bbb"},

593 {"abb", 0, "ab", "a", "ab"},

594 {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},

595 {"Not found", 0, "x", "0", "Not found"},

596 {"Not found again", 5, "x", "0", "Not found again"},

597 {" Making it much longer ", 0, " ", "Four score and seven years ago",

598 "Four score and seven years agoMakingFour score and seven years agoit"

599 "Four score and seven years agomuchFour score and seven years agolonger"

600 "Four score and seven years ago"},

601 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},

602 {"Replace me only me once", 9, "me ", "", "Replace me only once"},

603 {"abababab", 2, "ab", "c", "abccc"},

604 };

605

606 for (size_t i = 0; i < arraysize(cases); i++) {

607 string16 str = ASCIIToUTF16(cases[i].str);

608 ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,

609 ASCIIToUTF16(cases[i].find_this),

610 ASCIIToUTF16(cases[i].replace_with));

611 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);

612 }

613 }

614

615 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {

616 static const struct {

617 const char* str;

618 string16::size_type start_offset;

619 const char* find_this;

620 const char* replace_with;

621 const char* expected;

622 } cases[] = {

623 {"aaa", 0, "a", "b", "baa"},

624 {"abb", 0, "ab", "a", "ab"},

625 {"Removing some substrings inging", 0, "ing", "",

626 "Remov some substrings inging"},

627 {"Not found", 0, "x", "0", "Not found"},

628 {"Not found again", 5, "x", "0", "Not found again"},

629 {" Making it much longer ", 0, " ", "Four score and seven years ago",

630 "Four score and seven years agoMaking it much longer "},

631 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},

632 {"Replace me only me once", 4, "me ", "", "Replace only me once"},

633 {"abababab", 2, "ab", "c", "abcabab"},

634 };

635

636 for (size_t i = 0; i < arraysize(cases); i++) {

637 string16 str = ASCIIToUTF16(cases[i].str);

638 ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,

639 ASCIIToUTF16(cases[i].find_this),

640 ASCIIToUTF16(cases[i].replace_with));

641 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);

642 }

643 }

644

645 TEST(StringUtilTest, HexDigitToInt) {

646 EXPECT_EQ(0, HexDigitToInt('0'));

647 EXPECT_EQ(1, HexDigitToInt('1'));

648 EXPECT_EQ(2, HexDigitToInt('2'));

649 EXPECT_EQ(3, HexDigitToInt('3'));

650 EXPECT_EQ(4, HexDigitToInt('4'));

651 EXPECT_EQ(5, HexDigitToInt('5'));

652 EXPECT_EQ(6, HexDigitToInt('6'));

653 EXPECT_EQ(7, HexDigitToInt('7'));

654 EXPECT_EQ(8, HexDigitToInt('8'));

655 EXPECT_EQ(9, HexDigitToInt('9'));

656 EXPECT_EQ(10, HexDigitToInt('A'));

657 EXPECT_EQ(11, HexDigitToInt('B'));

658 EXPECT_EQ(12, HexDigitToInt('C'));

659 EXPECT_EQ(13, HexDigitToInt('D'));

660 EXPECT_EQ(14, HexDigitToInt('E'));

661 EXPECT_EQ(15, HexDigitToInt('F'));

662

663 // Verify the lower case as well.

664 EXPECT_EQ(10, HexDigitToInt('a'));

665 EXPECT_EQ(11, HexDigitToInt('b'));

666 EXPECT_EQ(12, HexDigitToInt('c'));

667 EXPECT_EQ(13, HexDigitToInt('d'));

668 EXPECT_EQ(14, HexDigitToInt('e'));

669 EXPECT_EQ(15, HexDigitToInt('f'));

670 }

671

672 TEST(StringUtilTest, JoinString) {

673 std::string separator(", ");

674 std::vector<std::string> parts;

675 EXPECT_EQ(std::string(), JoinString(parts, separator));

676

677 parts.push_back("a");

678 EXPECT_EQ("a", JoinString(parts, separator));

679

680 parts.push_back("b");

681 parts.push_back("c");

682 EXPECT_EQ("a, b, c", JoinString(parts, separator));

683

684 parts.push_back(std::string());

685 EXPECT_EQ("a, b, c, ", JoinString(parts, separator));

686 parts.push_back(" ");

687 EXPECT_EQ("a\|b\|c\|\| ", JoinString(parts, "\|"));

688 }

689

690 TEST(StringUtilTest, JoinString16) {

691 string16 separator = ASCIIToUTF16(", ");

692 std::vector<string16> parts;

693 EXPECT_EQ(string16(), JoinString(parts, separator));

694

695 parts.push_back(ASCIIToUTF16("a"));

696 EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));

697

698 parts.push_back(ASCIIToUTF16("b"));

699 parts.push_back(ASCIIToUTF16("c"));

700 EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));

701

702 parts.push_back(ASCIIToUTF16(""));

703 EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));

704 parts.push_back(ASCIIToUTF16(" "));

705 EXPECT_EQ(ASCIIToUTF16("a\|b\|c\|\| "), JoinString(parts, ASCIIToUTF16("\|")));

706 }

707

708 TEST(StringUtilTest, StartsWith) {

709 EXPECT_TRUE(

710 StartsWith("javascript:url", "javascript", base::CompareCase::SENSITIVE));

711 EXPECT_FALSE(

712 StartsWith("JavaScript:url", "javascript", base::CompareCase::SENSITIVE));

713 EXPECT_TRUE(StartsWith("javascript:url", "javascript",

714 base::CompareCase::INSENSITIVE_ASCII));

715 EXPECT_TRUE(StartsWith("JavaScript:url", "javascript",

716 base::CompareCase::INSENSITIVE_ASCII));

717 EXPECT_FALSE(StartsWith("java", "javascript", base::CompareCase::SENSITIVE));

718 EXPECT_FALSE(

719 StartsWith("java", "javascript", base::CompareCase::INSENSITIVE_ASCII));

720 EXPECT_FALSE(StartsWith(std::string(), "javascript",

721 base::CompareCase::INSENSITIVE_ASCII));

722 EXPECT_FALSE(

723 StartsWith(std::string(), "javascript", base::CompareCase::SENSITIVE));

724 EXPECT_TRUE(

725 StartsWith("java", std::string(), base::CompareCase::INSENSITIVE_ASCII));

726 EXPECT_TRUE(StartsWith("java", std::string(), base::CompareCase::SENSITIVE));

727

728 EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),

729 ASCIIToUTF16("javascript"),

730 base::CompareCase::SENSITIVE));

731 EXPECT_FALSE(StartsWith(ASCIIToUTF16("JavaScript:url"),

732 ASCIIToUTF16("javascript"),

733 base::CompareCase::SENSITIVE));

734 EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),

735 ASCIIToUTF16("javascript"),

736 base::CompareCase::INSENSITIVE_ASCII));

737 EXPECT_TRUE(StartsWith(ASCIIToUTF16("JavaScript:url"),

738 ASCIIToUTF16("javascript"),

739 base::CompareCase::INSENSITIVE_ASCII));

740 EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"), ASCIIToUTF16("javascript"),

741 base::CompareCase::SENSITIVE));

742 EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"), ASCIIToUTF16("javascript"),

743 base::CompareCase::INSENSITIVE_ASCII));

744 EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"),

745 base::CompareCase::INSENSITIVE_ASCII));

746 EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"),

747 base::CompareCase::SENSITIVE));

748 EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(),

749 base::CompareCase::INSENSITIVE_ASCII));

750 EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(),

751 base::CompareCase::SENSITIVE));

752 }

753

754 TEST(StringUtilTest, EndsWith) {

755 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), ASCIIToUTF16(".plugin"),

756 base::CompareCase::SENSITIVE));

757 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.Plugin"), ASCIIToUTF16(".plugin"),

758 base::CompareCase::SENSITIVE));

759 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), ASCIIToUTF16(".plugin"),

760 base::CompareCase::INSENSITIVE_ASCII));

761 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.Plugin"), ASCIIToUTF16(".plugin"),

762 base::CompareCase::INSENSITIVE_ASCII));

763 EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"),

764 base::CompareCase::SENSITIVE));

765 EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"),

766 base::CompareCase::INSENSITIVE_ASCII));

767 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"), ASCIIToUTF16(".plugin"),

768 base::CompareCase::SENSITIVE));

769 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"), ASCIIToUTF16(".plugin"),

770 base::CompareCase::INSENSITIVE_ASCII));

771 EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"),

772 base::CompareCase::INSENSITIVE_ASCII));

773 EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"),

774 base::CompareCase::SENSITIVE));

775 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(),

776 base::CompareCase::INSENSITIVE_ASCII));

777 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(),

778 base::CompareCase::SENSITIVE));

779 EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"),

780 base::CompareCase::INSENSITIVE_ASCII));

781 EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"),

782 base::CompareCase::SENSITIVE));

783 EXPECT_TRUE(

784 EndsWith(string16(), string16(), base::CompareCase::INSENSITIVE_ASCII));

785 EXPECT_TRUE(EndsWith(string16(), string16(), base::CompareCase::SENSITIVE));

786 }

787

788 TEST(StringUtilTest, GetStringFWithOffsets) {

789 std::vector<string16> subst;

790 subst.push_back(ASCIIToUTF16("1"));

791 subst.push_back(ASCIIToUTF16("2"));

792 std::vector<size_t> offsets;

793

794 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),

795 subst,

796 &offsets);

797 EXPECT_EQ(2U, offsets.size());

798 EXPECT_EQ(7U, offsets[0]);

799 EXPECT_EQ(25U, offsets[1]);

800 offsets.clear();

801

802 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),

803 subst,

804 &offsets);

805 EXPECT_EQ(2U, offsets.size());

806 EXPECT_EQ(25U, offsets[0]);

807 EXPECT_EQ(7U, offsets[1]);

808 offsets.clear();

809 }

810

811 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {

812 // Test whether replacestringplaceholders works as expected when there

813 // are fewer inputs than outputs.

814 std::vector<string16> subst;

815 subst.push_back(ASCIIToUTF16("9a"));

816 subst.push_back(ASCIIToUTF16("8b"));

817 subst.push_back(ASCIIToUTF16("7c"));

818

819 string16 formatted =

820 ReplaceStringPlaceholders(

821 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);

822

823 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));

824 }

825

826 TEST(StringUtilTest, ReplaceStringPlaceholders) {

827 std::vector<string16> subst;

828 subst.push_back(ASCIIToUTF16("9a"));

829 subst.push_back(ASCIIToUTF16("8b"));

830 subst.push_back(ASCIIToUTF16("7c"));

831 subst.push_back(ASCIIToUTF16("6d"));

832 subst.push_back(ASCIIToUTF16("5e"));

833 subst.push_back(ASCIIToUTF16("4f"));

834 subst.push_back(ASCIIToUTF16("3g"));

835 subst.push_back(ASCIIToUTF16("2h"));

836 subst.push_back(ASCIIToUTF16("1i"));

837

838 string16 formatted =

839 ReplaceStringPlaceholders(

840 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);

841

842 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));

843 }

844

845 TEST(StringUtilTest, ReplaceStringPlaceholdersMoreThan9Replacements) {

846 std::vector<string16> subst;

847 subst.push_back(ASCIIToUTF16("9a"));

848 subst.push_back(ASCIIToUTF16("8b"));

849 subst.push_back(ASCIIToUTF16("7c"));

850 subst.push_back(ASCIIToUTF16("6d"));

851 subst.push_back(ASCIIToUTF16("5e"));

852 subst.push_back(ASCIIToUTF16("4f"));

853 subst.push_back(ASCIIToUTF16("3g"));

854 subst.push_back(ASCIIToUTF16("2h"));

855 subst.push_back(ASCIIToUTF16("1i"));

856 subst.push_back(ASCIIToUTF16("0j"));

857 subst.push_back(ASCIIToUTF16("-1k"));

858 subst.push_back(ASCIIToUTF16("-2l"));

859 subst.push_back(ASCIIToUTF16("-3m"));

860 subst.push_back(ASCIIToUTF16("-4n"));

861

862 string16 formatted =

863 ReplaceStringPlaceholders(

864 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i,"

865 "$10j,$11k,$12l,$13m,$14n,$1"), subst, NULL);

866

867 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,"

868 "1ii,0jj,-1kk,-2ll,-3mm,-4nn,9a"));

869 }

870

871 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {

872 std::vector<std::string> subst;

873 subst.push_back("9a");

874 subst.push_back("8b");

875 subst.push_back("7c");

876 subst.push_back("6d");

877 subst.push_back("5e");

878 subst.push_back("4f");

879 subst.push_back("3g");

880 subst.push_back("2h");

881 subst.push_back("1i");

882

883 std::string formatted =

884 ReplaceStringPlaceholders(

885 "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);

886

887 EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");

888 }

889

890 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {

891 std::vector<std::string> subst;

892 subst.push_back("a");

893 subst.push_back("b");

894 subst.push_back("c");

895 EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL),

896 "$1 $$2 $$$3");

897 }

898

899 TEST(StringUtilTest, LcpyTest) {

900 // Test the normal case where we fit in our buffer.

901 {

902 char dst[10];

903 wchar_t wdst[10];

904 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));

905 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));

906 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));

907 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));

908 }

909

910 // Test dst_size == 0, nothing should be written to \|dst\| and we should

911 // have the equivalent of strlen(src).

912 {

913 char dst[2] = {1, 2};

914 wchar_t wdst[2] = {1, 2};

915 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", 0));

916 EXPECT_EQ(1, dst[0]);

917 EXPECT_EQ(2, dst[1]);

918 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", 0));

919 EXPECT_EQ(static_cast<wchar_t>(1), wdst[0]);

920 EXPECT_EQ(static_cast<wchar_t>(2), wdst[1]);

921 }

922

923 // Test the case were we _just_ competely fit including the null.

924 {

925 char dst[8];

926 wchar_t wdst[8];

927 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));

928 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));

929 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));

930 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));

931 }

932

933 // Test the case were we we are one smaller, so we can't fit the null.

934 {

935 char dst[7];

936 wchar_t wdst[7];

937 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));

938 EXPECT_EQ(0, memcmp(dst, "abcdef", 7));

939 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));

940 EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));

941 }

942

943 // Test the case were we are just too small.

944 {

945 char dst[3];

946 wchar_t wdst[3];

947 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));

948 EXPECT_EQ(0, memcmp(dst, "ab", 3));

949 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));

950 EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));

951 }

952 }

953

954 TEST(StringUtilTest, WprintfFormatPortabilityTest) {

955 static const struct {

956 const wchar_t* input;

957 bool portable;

958 } cases[] = {

959 { L"%ls", true },

960 { L"%s", false },

961 { L"%S", false },

962 { L"%lS", false },

963 { L"Hello, %s", false },

964 { L"%lc", true },

965 { L"%c", false },

966 { L"%C", false },

967 { L"%lC", false },

968 { L"%ls %s", false },

969 { L"%s %ls", false },

970 { L"%s %ls %s", false },

971 { L"%f", true },

972 { L"%f %F", false },

973 { L"%d %D", false },

974 { L"%o %O", false },

975 { L"%u %U", false },

976 { L"%f %d %o %u", true },

977 { L"%-8d (%02.1f%)", true },

978 { L"% 10s", false },

979 { L"% 10ls", true }

980 };

981 for (size_t i = 0; i < arraysize(cases); ++i)

982 EXPECT_EQ(cases[i].portable, IsWprintfFormatPortable(cases[i].input));

983 }

984

985 TEST(StringUtilTest, RemoveChars) {

986 const char kRemoveChars[] = "-/+*";

987 std::string input = "A-+bc/d!*";

988 EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));

989 EXPECT_EQ("Abcd!", input);

990

991 // No characters match kRemoveChars.

992 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));

993 EXPECT_EQ("Abcd!", input);

994

995 // Empty string.

996 input.clear();

997 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));

998 EXPECT_EQ(std::string(), input);

999 }

1000

1001 TEST(StringUtilTest, ReplaceChars) {

1002 struct TestData {

1003 const char* input;

1004 const char* replace_chars;

1005 const char* replace_with;

1006 const char* output;

1007 bool result;

1008 } cases[] = {

1009 { "", "", "", "", false },

1010 { "test", "", "", "test", false },

1011 { "test", "", "!", "test", false },

1012 { "test", "z", "!", "test", false },

1013 { "test", "e", "!", "t!st", true },

1014 { "test", "e", "!?", "t!?st", true },

1015 { "test", "ez", "!", "t!st", true },

1016 { "test", "zed", "!?", "t!?st", true },

1017 { "test", "t", "!?", "!?es!?", true },

1018 { "test", "et", "!>", "!>!>s!>", true },

1019 { "test", "zest", "!", "!!!!", true },

1020 { "test", "szt", "!", "!e!!", true },

1021 { "test", "t", "test", "testestest", true },

1022 };

1023

1024 for (size_t i = 0; i < arraysize(cases); ++i) {

1025 std::string output;

1026 bool result = ReplaceChars(cases[i].input,

1027 cases[i].replace_chars,

1028 cases[i].replace_with,

1029 &output);

1030 EXPECT_EQ(cases[i].result, result);

1031 EXPECT_EQ(cases[i].output, output);

1032 }

1033 }

1034

1035 TEST(StringUtilTest, ContainsOnlyChars) {

1036 // Providing an empty list of characters should return false but for the empty

1037 // string.

1038 EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));

1039 EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));

1040

1041 EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));

1042 EXPECT_TRUE(ContainsOnlyChars("1", "1234"));

1043 EXPECT_TRUE(ContainsOnlyChars("1", "4321"));

1044 EXPECT_TRUE(ContainsOnlyChars("123", "4321"));

1045 EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));

1046

1047 EXPECT_TRUE(ContainsOnlyChars(std::string(), kWhitespaceASCII));

1048 EXPECT_TRUE(ContainsOnlyChars(" ", kWhitespaceASCII));

1049 EXPECT_TRUE(ContainsOnlyChars("\t", kWhitespaceASCII));

1050 EXPECT_TRUE(ContainsOnlyChars("\t \r \n ", kWhitespaceASCII));

1051 EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII));

1052 EXPECT_FALSE(ContainsOnlyChars("\thello\r \n ", kWhitespaceASCII));

1053

1054 EXPECT_TRUE(ContainsOnlyChars(string16(), kWhitespaceUTF16));

1055 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16(" "), kWhitespaceUTF16));

1056 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t"), kWhitespaceUTF16));

1057 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t \r \n "), kWhitespaceUTF16));

1058 EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("a"), kWhitespaceUTF16));

1059 EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("\thello\r \n "),

1060 kWhitespaceUTF16));

1061 }

1062

1063 TEST(StringUtilTest, CompareCaseInsensitiveASCII) {

1064 EXPECT_EQ(0, CompareCaseInsensitiveASCII("", ""));

1065 EXPECT_EQ(0, CompareCaseInsensitiveASCII("Asdf", "aSDf"));

1066

1067 // Differing lengths.

1068 EXPECT_EQ(-1, CompareCaseInsensitiveASCII("Asdf", "aSDfA"));

1069 EXPECT_EQ(1, CompareCaseInsensitiveASCII("AsdfA", "aSDf"));

1070

1071 // Differing values.

1072 EXPECT_EQ(-1, CompareCaseInsensitiveASCII("AsdfA", "aSDfb"));

1073 EXPECT_EQ(1, CompareCaseInsensitiveASCII("Asdfb", "aSDfA"));

1074 }

1075

1076 TEST(StringUtilTest, EqualsCaseInsensitiveASCII) {

1077 EXPECT_TRUE(EqualsCaseInsensitiveASCII("", ""));

1078 EXPECT_TRUE(EqualsCaseInsensitiveASCII("Asdf", "aSDF"));

1079 EXPECT_FALSE(EqualsCaseInsensitiveASCII("bsdf", "aSDF"));

1080 EXPECT_FALSE(EqualsCaseInsensitiveASCII("Asdf", "aSDFz"));

1081 }

1082

1083 class WriteIntoTest : public testing::Test {

1084 protected:

1085 static void WritesCorrectly(size_t num_chars) {

1086 std::string buffer;

1087 char kOriginal[] = "supercali";

1088 strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);

1089 // Using std::string(buffer.c_str()) instead of \|buffer\| truncates the

1090 // string at the first \0.

1091 EXPECT_EQ(std::string(kOriginal,

1092 std::min(num_chars, arraysize(kOriginal) - 1)),

1093 std::string(buffer.c_str()));

1094 EXPECT_EQ(num_chars, buffer.size());

1095 }

1096 };

1097

1098 TEST_F(WriteIntoTest, WriteInto) {

1099 // Validate that WriteInto reserves enough space and

1100 // sizes a string correctly.

1101 WritesCorrectly(1);

1102 WritesCorrectly(2);

1103 WritesCorrectly(5000);

1104

1105 // Validate that WriteInto doesn't modify other strings

1106 // when using a Copy-on-Write implementation.

1107 const char kLive[] = "live";

1108 const char kDead[] = "dead";

1109 const std::string live = kLive;

1110 std::string dead = live;

1111 strncpy(WriteInto(&dead, 5), kDead, 4);

1112 EXPECT_EQ(kDead, dead);

1113 EXPECT_EQ(4u, dead.size());

1114 EXPECT_EQ(kLive, live);

1115 EXPECT_EQ(4u, live.size());

1116 }

1117

1118 } // namespace base

OLD	NEW

« no previous file with comments | « base/strings/string_util_posix.h ('k') | base/strings/string_util_win.h » ('j') | no next file with comments »