base/string_util_static.cc - Issue 6877053: Base: More adjustments to BASE_API and project dependencies to

Side by Side Diff: base/string_util_static.cc

Issue 6877053: Base: More adjustments to BASE_API and project dependencies to (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Created 9 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "base/string_util.h"	5 #include "base/string_util_static.h"

6

7 #include "build/build_config.h"

8

9 #include <ctype.h>

10 #include <errno.h>

11 #include <math.h>

12 #include <stdarg.h>

13 #include <stdio.h>

14 #include <stdlib.h>

15 #include <string.h>

16 #include <time.h>

17 #include <wchar.h>

18 #include <wctype.h>

19

20 #include <algorithm>

21 #include <vector>

22

23 #include "base/basictypes.h"

24 #include "base/logging.h"

25 #include "base/memory/singleton.h"

26 #include "base/third_party/dmg_fp/dmg_fp.h"

27 #include "base/utf_string_conversion_utils.h"

28 #include "base/utf_string_conversions.h"

29 #include "base/third_party/icu/icu_utf.h"

30

31 namespace {

32

33 // Force the singleton used by Empty[W]String[16] to be a unique type. This

34 // prevents other code that might accidentally use Singleton<string> from

35 // getting our internal one.

36 struct EmptyStrings {

37 EmptyStrings() {}

38 const std::string s;

39 const std::wstring ws;

40 const string16 s16;

41

42 static EmptyStrings* GetInstance() {

43 return Singleton<EmptyStrings>::get();

44 }

45 };

46

47 // Used by ReplaceStringPlaceholders to track the position in the string of

48 // replaced parameters.

49 struct ReplacementOffset {

50 ReplacementOffset(uintptr_t parameter, size_t offset)

51 : parameter(parameter),

52 offset(offset) {}

53

54 // Index of the parameter.

55 uintptr_t parameter;

56

57 // Starting position in the string.

58 size_t offset;

59 };

60

61 static bool CompareParameter(const ReplacementOffset& elem1,

62 const ReplacementOffset& elem2) {

63 return elem1.parameter < elem2.parameter;

64 }

65

66 } // namespace

67

68 namespace base {

69

70 bool IsWprintfFormatPortable(const wchar_t* format) {

71 for (const wchar_t* position = format; *position != '\0'; ++position) {

72 if (*position == '%') {

73 bool in_specification = true;

74 bool modifier_l = false;

75 while (in_specification) {

76 // Eat up characters until reaching a known specifier.

77 if (*++position == '\0') {

78 // The format string ended in the middle of a specification. Call

79 // it portable because no unportable specifications were found. The

80 // string is equally broken on all platforms.

81 return true;

82 }

83

84 if (*position == 'l') {

85 // 'l' is the only thing that can save the 's' and 'c' specifiers.

86 modifier_l = true;

87 } else if (((position == 's' \|\| position == 'c') && !modifier_l) \|\|

88 position == 'S' \|\| position == 'C' \|\| *position == 'F' \|\|

89 position == 'D' \|\| position == 'O' \|\| *position == 'U') {

90 // Not portable.

91 return false;

92 }

93

94 if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {

95 // Portable, keep scanning the rest of the format string.

96 in_specification = false;

97 }

98 }

99 }

100 }

101

102 return true;

103 }

104

105 } // namespace base

106

107

108 const std::string& EmptyString() {

109 return EmptyStrings::GetInstance()->s;

110 }

111

112 const std::wstring& EmptyWString() {

113 return EmptyStrings::GetInstance()->ws;

114 }

115

116 const string16& EmptyString16() {

117 return EmptyStrings::GetInstance()->s16;

118 }

119	6

120 #define WHITESPACE_UNICODE \	7 #define WHITESPACE_UNICODE \

121 0x0009, /* <control-0009> to <control-000D> */ \	8 0x0009, /* <control-0009> to <control-000D> */ \

122 0x000A, \	9 0x000A, \

123 0x000B, \	10 0x000B, \

124 0x000C, \	11 0x000C, \

125 0x000D, \	12 0x000D, \

126 0x0020, /* Space */ \	13 0x0020, /* Space */ \

127 0x0085, /* <control-0085> */ \	14 0x0085, /* <control-0085> */ \

128 0x00A0, /* No-Break Space */ \	15 0x00A0, /* No-Break Space */ \

(...skipping 28 matching lines...) Expand all Loading...
157 0x09, // <control-0009> to <control-000D>	44 0x09, // <control-0009> to <control-000D>

158 0x0A,	45 0x0A,

159 0x0B,	46 0x0B,

160 0x0C,	47 0x0C,

161 0x0D,	48 0x0D,

162 0x20, // Space	49 0x20, // Space

163 0	50 0

164 };	51 };

165	52

166 const char kUtf8ByteOrderMark[] = "\xEF\xBB\xBF";	53 const char kUtf8ByteOrderMark[] = "\xEF\xBB\xBF";

167

168 template<typename STR>

169 bool RemoveCharsT(const STR& input,

170 const typename STR::value_type remove_chars[],

171 STR* output) {

172 bool removed = false;

173 size_t found;

174

175 *output = input;

176

177 found = output->find_first_of(remove_chars);

178 while (found != STR::npos) {

179 removed = true;

180 output->replace(found, 1, STR());

181 found = output->find_first_of(remove_chars, found);

182 }

183

184 return removed;

185 }

186

187 bool RemoveChars(const std::wstring& input,

188 const wchar_t remove_chars[],

189 std::wstring* output) {

190 return RemoveCharsT(input, remove_chars, output);

191 }

192

193 #if !defined(WCHAR_T_IS_UTF16)

194 bool RemoveChars(const string16& input,

195 const char16 remove_chars[],

196 string16* output) {

197 return RemoveCharsT(input, remove_chars, output);

198 }

199 #endif

200

201 bool RemoveChars(const std::string& input,

202 const char remove_chars[],

203 std::string* output) {

204 return RemoveCharsT(input, remove_chars, output);

205 }

206

207 template<typename STR>

208 TrimPositions TrimStringT(const STR& input,

209 const typename STR::value_type trim_chars[],

210 TrimPositions positions,

211 STR* output) {

212 // Find the edges of leading/trailing whitespace as desired.

213 const typename STR::size_type last_char = input.length() - 1;

214 const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ?

215 input.find_first_not_of(trim_chars) : 0;

216 const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ?

217 input.find_last_not_of(trim_chars) : last_char;

218

219 // When the string was all whitespace, report that we stripped off whitespace

220 // from whichever position the caller was interested in. For empty input, we

221 // stripped no whitespace, but we still need to clear \|output\|.

222 if (input.empty() \|\|

223 (first_good_char == STR::npos) \|\| (last_good_char == STR::npos)) {

224 bool input_was_empty = input.empty(); // in case output == &input

225 output->clear();

226 return input_was_empty ? TRIM_NONE : positions;

227 }

228

229 // Trim the whitespace.

230 *output =

231 input.substr(first_good_char, last_good_char - first_good_char + 1);

232

233 // Return where we trimmed from.

234 return static_cast<TrimPositions>(

235 ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) \|

236 ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));

237 }

238

239 bool TrimString(const std::wstring& input,

240 const wchar_t trim_chars[],

241 std::wstring* output) {

242 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;

243 }

244

245 #if !defined(WCHAR_T_IS_UTF16)

246 bool TrimString(const string16& input,

247 const char16 trim_chars[],

248 string16* output) {

249 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;

250 }

251 #endif

252

253 bool TrimString(const std::string& input,

254 const char trim_chars[],

255 std::string* output) {

256 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;

257 }

258

259 void TruncateUTF8ToByteSize(const std::string& input,

260 const size_t byte_size,

261 std::string* output) {

262 DCHECK(output);

263 if (byte_size > input.length()) {

264 *output = input;

265 return;

266 }

267 DCHECK_LE(byte_size, static_cast<uint32>(kint32max));

268 // Note: This cast is necessary because CBU8_NEXT uses int32s.

269 int32 truncation_length = static_cast<int32>(byte_size);

270 int32 char_index = truncation_length - 1;

271 const char* data = input.data();

272

273 // Using CBU8, we will move backwards from the truncation point

274 // to the beginning of the string looking for a valid UTF8

275 // character. Once a full UTF8 character is found, we will

276 // truncate the string to the end of that character.

277 while (char_index >= 0) {

278 int32 prev = char_index;

279 uint32 code_point = 0;

280 CBU8_NEXT(data, char_index, truncation_length, code_point);

281 if (!base::IsValidCharacter(code_point) \|\|

282 !base::IsValidCodepoint(code_point)) {

283 char_index = prev - 1;

284 } else {

285 break;

286 }

287 }

288

289 if (char_index >= 0 )

290 *output = input.substr(0, char_index);

291 else

292 output->clear();

293 }

294

295 TrimPositions TrimWhitespace(const std::wstring& input,

296 TrimPositions positions,

297 std::wstring* output) {

298 return TrimStringT(input, kWhitespaceWide, positions, output);

299 }

300

301 #if !defined(WCHAR_T_IS_UTF16)

302 TrimPositions TrimWhitespace(const string16& input,

303 TrimPositions positions,

304 string16* output) {

305 return TrimStringT(input, kWhitespaceUTF16, positions, output);

306 }

307 #endif

308

309 TrimPositions TrimWhitespaceASCII(const std::string& input,

310 TrimPositions positions,

311 std::string* output) {

312 return TrimStringT(input, kWhitespaceASCII, positions, output);

313 }

314

315 // This function is only for backward-compatibility.

316 // To be removed when all callers are updated.

317 TrimPositions TrimWhitespace(const std::string& input,

318 TrimPositions positions,

319 std::string* output) {

320 return TrimWhitespaceASCII(input, positions, output);

321 }

322

323 template<typename STR>

324 STR CollapseWhitespaceT(const STR& text,

325 bool trim_sequences_with_line_breaks) {

326 STR result;

327 result.resize(text.size());

328

329 // Set flags to pretend we're already in a trimmed whitespace sequence, so we

330 // will trim any leading whitespace.

331 bool in_whitespace = true;

332 bool already_trimmed = true;

333

334 int chars_written = 0;

335 for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {

336 if (IsWhitespace(*i)) {

337 if (!in_whitespace) {

338 // Reduce all whitespace sequences to a single space.

339 in_whitespace = true;

340 result[chars_written++] = L' ';

341 }

342 if (trim_sequences_with_line_breaks && !already_trimmed &&

343 ((i == '\n') \|\| (i == '\r'))) {

344 // Whitespace sequences containing CR or LF are eliminated entirely.

345 already_trimmed = true;

346 --chars_written;

347 }

348 } else {

349 // Non-whitespace chracters are copied straight across.

350 in_whitespace = false;

351 already_trimmed = false;

352 result[chars_written++] = *i;

353 }

354 }

355

356 if (in_whitespace && !already_trimmed) {

357 // Any trailing whitespace is eliminated.

358 --chars_written;

359 }

360

361 result.resize(chars_written);

362 return result;

363 }

364

365 std::wstring CollapseWhitespace(const std::wstring& text,

366 bool trim_sequences_with_line_breaks) {

367 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);

368 }

369

370 #if !defined(WCHAR_T_IS_UTF16)

371 string16 CollapseWhitespace(const string16& text,

372 bool trim_sequences_with_line_breaks) {

373 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);

374 }

375 #endif

376

377 std::string CollapseWhitespaceASCII(const std::string& text,

378 bool trim_sequences_with_line_breaks) {

379 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);

380 }

381

382 bool ContainsOnlyWhitespaceASCII(const std::string& str) {

383 for (std::string::const_iterator i(str.begin()); i != str.end(); ++i) {

384 if (!IsAsciiWhitespace(*i))

385 return false;

386 }

387 return true;

388 }

389

390 bool ContainsOnlyWhitespace(const string16& str) {

391 for (string16::const_iterator i(str.begin()); i != str.end(); ++i) {

392 if (!IsWhitespace(*i))

393 return false;

394 }

395 return true;

396 }

397

398 template<typename STR>

399 static bool ContainsOnlyCharsT(const STR& input, const STR& characters) {

400 for (typename STR::const_iterator iter = input.begin();

401 iter != input.end(); ++iter) {

402 if (characters.find(*iter) == STR::npos)

403 return false;

404 }

405 return true;

406 }

407

408 bool ContainsOnlyChars(const std::wstring& input,

409 const std::wstring& characters) {

410 return ContainsOnlyCharsT(input, characters);

411 }

412

413 #if !defined(WCHAR_T_IS_UTF16)

414 bool ContainsOnlyChars(const string16& input, const string16& characters) {

415 return ContainsOnlyCharsT(input, characters);

416 }

417 #endif

418

419 bool ContainsOnlyChars(const std::string& input,

420 const std::string& characters) {

421 return ContainsOnlyCharsT(input, characters);

422 }

423

424 std::string WideToASCII(const std::wstring& wide) {

425 DCHECK(IsStringASCII(wide)) << wide;

426 return std::string(wide.begin(), wide.end());

427 }

428

429 std::string UTF16ToASCII(const string16& utf16) {

430 DCHECK(IsStringASCII(utf16)) << utf16;

431 return std::string(utf16.begin(), utf16.end());

432 }

433

434 // Latin1 is just the low range of Unicode, so we can copy directly to convert.

435 bool WideToLatin1(const std::wstring& wide, std::string* latin1) {

436 std::string output;

437 output.resize(wide.size());

438 latin1->clear();

439 for (size_t i = 0; i < wide.size(); i++) {

440 if (wide[i] > 255)

441 return false;

442 output[i] = static_cast<char>(wide[i]);

443 }

444 latin1->swap(output);

445 return true;

446 }

447

448 template<class STR>

449 static bool DoIsStringASCII(const STR& str) {

450 for (size_t i = 0; i < str.length(); i++) {

451 typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i];

452 if (c > 0x7F)

453 return false;

454 }

455 return true;

456 }

457

458 bool IsStringASCII(const std::wstring& str) {

459 return DoIsStringASCII(str);

460 }

461

462 #if !defined(WCHAR_T_IS_UTF16)

463 bool IsStringASCII(const string16& str) {

464 return DoIsStringASCII(str);

465 }

466 #endif

467

468 bool IsStringASCII(const base::StringPiece& str) {

469 return DoIsStringASCII(str);

470 }

471

472 bool IsStringUTF8(const std::string& str) {

473 const char *src = str.data();

474 int32 src_len = static_cast<int32>(str.length());

475 int32 char_index = 0;

476

477 while (char_index < src_len) {

478 int32 code_point;

479 CBU8_NEXT(src, char_index, src_len, code_point);

480 if (!base::IsValidCharacter(code_point))

481 return false;

482 }

483 return true;

484 }

485

486 template<typename Iter>

487 static inline bool DoLowerCaseEqualsASCII(Iter a_begin,

488 Iter a_end,

489 const char* b) {

490 for (Iter it = a_begin; it != a_end; ++it, ++b) {

491 if (!b \|\| base::ToLowerASCII(it) != *b)

492 return false;

493 }

494 return *b == 0;

495 }

496

497 // Front-ends for LowerCaseEqualsASCII.

498 bool LowerCaseEqualsASCII(const std::string& a, const char* b) {

499 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);

500 }

501

502 bool LowerCaseEqualsASCII(const std::wstring& a, const char* b) {

503 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);

504 }

505

506 #if !defined(WCHAR_T_IS_UTF16)

507 bool LowerCaseEqualsASCII(const string16& a, const char* b) {

508 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);

509 }

510 #endif

511

512 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,

513 std::string::const_iterator a_end,

514 const char* b) {

515 return DoLowerCaseEqualsASCII(a_begin, a_end, b);

516 }

517

518 bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin,

519 std::wstring::const_iterator a_end,

520 const char* b) {

521 return DoLowerCaseEqualsASCII(a_begin, a_end, b);

522 }

523

524 #if !defined(WCHAR_T_IS_UTF16)

525 bool LowerCaseEqualsASCII(string16::const_iterator a_begin,

526 string16::const_iterator a_end,

527 const char* b) {

528 return DoLowerCaseEqualsASCII(a_begin, a_end, b);

529 }

530 #endif

531

532 bool LowerCaseEqualsASCII(const char* a_begin,

533 const char* a_end,

534 const char* b) {

535 return DoLowerCaseEqualsASCII(a_begin, a_end, b);

536 }

537

538 bool LowerCaseEqualsASCII(const wchar_t* a_begin,

539 const wchar_t* a_end,

540 const char* b) {

541 return DoLowerCaseEqualsASCII(a_begin, a_end, b);

542 }

543

544 #if !defined(WCHAR_T_IS_UTF16)

545 bool LowerCaseEqualsASCII(const char16* a_begin,

546 const char16* a_end,

547 const char* b) {

548 return DoLowerCaseEqualsASCII(a_begin, a_end, b);

549 }

550 #endif

551

552 bool EqualsASCII(const string16& a, const base::StringPiece& b) {

553 if (a.length() != b.length())

554 return false;

555 return std::equal(b.begin(), b.end(), a.begin());

556 }

557

558 bool StartsWithASCII(const std::string& str,

559 const std::string& search,

560 bool case_sensitive) {

561 if (case_sensitive)

562 return str.compare(0, search.length(), search) == 0;

563 else

564 return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0;

565 }

566

567 template <typename STR>

568 bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) {

569 if (case_sensitive) {

570 return str.compare(0, search.length(), search) == 0;

571 } else {

572 if (search.size() > str.size())

573 return false;

574 return std::equal(search.begin(), search.end(), str.begin(),

575 base::CaseInsensitiveCompare<typename STR::value_type>());

576 }

577 }

578

579 bool StartsWith(const std::wstring& str, const std::wstring& search,

580 bool case_sensitive) {

581 return StartsWithT(str, search, case_sensitive);

582 }

583

584 #if !defined(WCHAR_T_IS_UTF16)

585 bool StartsWith(const string16& str, const string16& search,

586 bool case_sensitive) {

587 return StartsWithT(str, search, case_sensitive);

588 }

589 #endif

590

591 template <typename STR>

592 bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) {

593 typename STR::size_type str_length = str.length();

594 typename STR::size_type search_length = search.length();

595 if (search_length > str_length)

596 return false;

597 if (case_sensitive) {

598 return str.compare(str_length - search_length, search_length, search) == 0;

599 } else {

600 return std::equal(search.begin(), search.end(),

601 str.begin() + (str_length - search_length),

602 base::CaseInsensitiveCompare<typename STR::value_type>());

603 }

604 }

605

606 bool EndsWith(const std::string& str, const std::string& search,

607 bool case_sensitive) {

608 return EndsWithT(str, search, case_sensitive);

609 }

610

611 bool EndsWith(const std::wstring& str, const std::wstring& search,

612 bool case_sensitive) {

613 return EndsWithT(str, search, case_sensitive);

614 }

615

616 #if !defined(WCHAR_T_IS_UTF16)

617 bool EndsWith(const string16& str, const string16& search,

618 bool case_sensitive) {

619 return EndsWithT(str, search, case_sensitive);

620 }

621 #endif

622

623 DataUnits GetByteDisplayUnits(int64 bytes) {

624 // The byte thresholds at which we display amounts. A byte count is displayed

625 // in unit U when kUnitThresholds[U] <= bytes < kUnitThresholds[U+1].

626 // This must match the DataUnits enum.

627 static const int64 kUnitThresholds[] = {

628 0, // DATA_UNITS_BYTE,

629 3*1024, // DATA_UNITS_KIBIBYTE,

630 210241024, // DATA_UNITS_MEBIBYTE,

631 102410241024 // DATA_UNITS_GIBIBYTE,

632 };

633

634 if (bytes < 0) {

635 NOTREACHED() << "Negative bytes value";

636 return DATA_UNITS_BYTE;

637 }

638

639 int unit_index = arraysize(kUnitThresholds);

640 while (--unit_index > 0) {

641 if (bytes >= kUnitThresholds[unit_index])

642 break;

643 }

644

645 DCHECK(unit_index >= DATA_UNITS_BYTE && unit_index <= DATA_UNITS_GIBIBYTE);

646 return DataUnits(unit_index);

647 }

648

649 // TODO(mpcomplete): deal with locale

650 // Byte suffixes. This must match the DataUnits enum.

651 static const char* const kByteStrings[] = {

652 "B",

653 "kB",

654 "MB",

655 "GB"

656 };

657

658 static const char* const kSpeedStrings[] = {

659 "B/s",

660 "kB/s",

661 "MB/s",

662 "GB/s"

663 };

664

665 string16 FormatBytesInternal(int64 bytes,

666 DataUnits units,

667 bool show_units,

668 const char* const* suffix) {

669 if (bytes < 0) {

670 NOTREACHED() << "Negative bytes value";

671 return string16();

672 }

673

674 DCHECK(units >= DATA_UNITS_BYTE && units <= DATA_UNITS_GIBIBYTE);

675

676 // Put the quantity in the right units.

677 double unit_amount = static_cast<double>(bytes);

678 for (int i = 0; i < units; ++i)

679 unit_amount /= 1024.0;

680

681 char buf[64];

682 if (bytes != 0 && units != DATA_UNITS_BYTE && unit_amount < 100)

683 base::snprintf(buf, arraysize(buf), "%.1lf", unit_amount);

684 else

685 base::snprintf(buf, arraysize(buf), "%.0lf", unit_amount);

686

687 std::string ret(buf);

688 if (show_units) {

689 ret += " ";

690 ret += suffix[units];

691 }

692

693 return ASCIIToUTF16(ret);

694 }

695

696 string16 FormatBytes(int64 bytes, DataUnits units, bool show_units) {

697 return FormatBytesInternal(bytes, units, show_units, kByteStrings);

698 }

699

700 string16 FormatSpeed(int64 bytes, DataUnits units, bool show_units) {

701 return FormatBytesInternal(bytes, units, show_units, kSpeedStrings);

702 }

703

704 template<class StringType>

705 void DoReplaceSubstringsAfterOffset(StringType* str,

706 typename StringType::size_type start_offset,

707 const StringType& find_this,

708 const StringType& replace_with,

709 bool replace_all) {

710 if ((start_offset == StringType::npos) \|\| (start_offset >= str->length()))

711 return;

712

713 DCHECK(!find_this.empty());

714 for (typename StringType::size_type offs(str->find(find_this, start_offset));

715 offs != StringType::npos; offs = str->find(find_this, offs)) {

716 str->replace(offs, find_this.length(), replace_with);

717 offs += replace_with.length();

718

719 if (!replace_all)

720 break;

721 }

722 }

723

724 void ReplaceFirstSubstringAfterOffset(string16* str,

725 string16::size_type start_offset,

726 const string16& find_this,

727 const string16& replace_with) {

728 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,

729 false); // replace first instance

730 }

731

732 void ReplaceFirstSubstringAfterOffset(std::string* str,

733 std::string::size_type start_offset,

734 const std::string& find_this,

735 const std::string& replace_with) {

736 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,

737 false); // replace first instance

738 }

739

740 void ReplaceSubstringsAfterOffset(string16* str,

741 string16::size_type start_offset,

742 const string16& find_this,

743 const string16& replace_with) {

744 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,

745 true); // replace all instances

746 }

747

748 void ReplaceSubstringsAfterOffset(std::string* str,

749 std::string::size_type start_offset,

750 const std::string& find_this,

751 const std::string& replace_with) {

752 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,

753 true); // replace all instances

754 }

755

756

757 template<typename STR>

758 static size_t TokenizeT(const STR& str,

759 const STR& delimiters,

760 std::vector<STR>* tokens) {

761 tokens->clear();

762

763 typename STR::size_type start = str.find_first_not_of(delimiters);

764 while (start != STR::npos) {

765 typename STR::size_type end = str.find_first_of(delimiters, start + 1);

766 if (end == STR::npos) {

767 tokens->push_back(str.substr(start));

768 break;

769 } else {

770 tokens->push_back(str.substr(start, end - start));

771 start = str.find_first_not_of(delimiters, end + 1);

772 }

773 }

774

775 return tokens->size();

776 }

777

778 size_t Tokenize(const std::wstring& str,

779 const std::wstring& delimiters,

780 std::vector<std::wstring>* tokens) {

781 return TokenizeT(str, delimiters, tokens);

782 }

783

784 #if !defined(WCHAR_T_IS_UTF16)

785 size_t Tokenize(const string16& str,

786 const string16& delimiters,

787 std::vector<string16>* tokens) {

788 return TokenizeT(str, delimiters, tokens);

789 }

790 #endif

791

792 size_t Tokenize(const std::string& str,

793 const std::string& delimiters,

794 std::vector<std::string>* tokens) {

795 return TokenizeT(str, delimiters, tokens);

796 }

797

798 size_t Tokenize(const base::StringPiece& str,

799 const base::StringPiece& delimiters,

800 std::vector<base::StringPiece>* tokens) {

801 return TokenizeT(str, delimiters, tokens);

802 }

803

804 template<typename STR>

805 static STR JoinStringT(const std::vector<STR>& parts,

806 typename STR::value_type sep) {

807 if (parts.empty())

808 return STR();

809

810 STR result(parts[0]);

811 typename std::vector<STR>::const_iterator iter = parts.begin();

812 ++iter;

813

814 for (; iter != parts.end(); ++iter) {

815 result += sep;

816 result += *iter;

817 }

818

819 return result;

820 }

821

822 std::string JoinString(const std::vector<std::string>& parts, char sep) {

823 return JoinStringT(parts, sep);

824 }

825

826 string16 JoinString(const std::vector<string16>& parts, char16 sep) {

827 return JoinStringT(parts, sep);

828 }

829

830 template<class FormatStringType, class OutStringType>

831 OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string,

832 const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) {

833 size_t substitutions = subst.size();

834 DCHECK(substitutions < 10);

835

836 size_t sub_length = 0;

837 for (typename std::vector<OutStringType>::const_iterator iter = subst.begin();

838 iter != subst.end(); ++iter) {

839 sub_length += iter->length();

840 }

841

842 OutStringType formatted;

843 formatted.reserve(format_string.length() + sub_length);

844

845 std::vector<ReplacementOffset> r_offsets;

846 for (typename FormatStringType::const_iterator i = format_string.begin();

847 i != format_string.end(); ++i) {

848 if ('$' == *i) {

849 if (i + 1 != format_string.end()) {

850 ++i;

851 DCHECK('$' == i \|\| '1' <= i) << "Invalid placeholder: " << *i;

852 if ('$' == *i) {

853 while (i != format_string.end() && '$' == *i) {

854 formatted.push_back('$');

855 ++i;

856 }

857 --i;

858 } else {

859 uintptr_t index = *i - '1';

860 if (offsets) {

861 ReplacementOffset r_offset(index,

862 static_cast<int>(formatted.size()));

863 r_offsets.insert(std::lower_bound(r_offsets.begin(),

864 r_offsets.end(),

865 r_offset,

866 &CompareParameter),

867 r_offset);

868 }

869 if (index < substitutions)

870 formatted.append(subst.at(index));

871 }

872 }

873 } else {

874 formatted.push_back(*i);

875 }

876 }

877 if (offsets) {

878 for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();

879 i != r_offsets.end(); ++i) {

880 offsets->push_back(i->offset);

881 }

882 }

883 return formatted;

884 }

885

886 string16 ReplaceStringPlaceholders(const string16& format_string,

887 const std::vector<string16>& subst,

888 std::vector<size_t>* offsets) {

889 return DoReplaceStringPlaceholders(format_string, subst, offsets);

890 }

891

892 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string,

893 const std::vector<std::string>& subst,

894 std::vector<size_t>* offsets) {

895 return DoReplaceStringPlaceholders(format_string, subst, offsets);

896 }

897

898 string16 ReplaceStringPlaceholders(const string16& format_string,

899 const string16& a,

900 size_t* offset) {

901 std::vector<size_t> offsets;

902 std::vector<string16> subst;

903 subst.push_back(a);

904 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);

905

906 DCHECK(offsets.size() == 1);

907 if (offset) {

908 *offset = offsets[0];

909 }

910 return result;

911 }

912

913 static bool IsWildcard(base_icu::UChar32 character) {

914 return character == '*' \|\| character == '?';

915 }

916

917 // Move the strings pointers to the point where they start to differ.

918 template <typename CHAR, typename NEXT>

919 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,

920 const CHAR** string, const CHAR* string_end,

921 NEXT next) {

922 const CHAR* escape = NULL;

923 while (pattern != pattern_end && string != string_end) {

924 if (!escape && IsWildcard(**pattern)) {

925 // We don't want to match wildcard here, except if it's escaped.

926 return;

927 }

928

929 // Check if the escapement char is found. If so, skip it and move to the

930 // next character.

931 if (!escape && **pattern == '\\') {

932 escape = *pattern;

933 next(pattern, pattern_end);

934 continue;

935 }

936

937 // Check if the chars match, if so, increment the ptrs.

938 const CHAR* pattern_next = *pattern;

939 const CHAR* string_next = *string;

940 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);

941 if (pattern_char == next(&string_next, string_end) &&

942 pattern_char != (base_icu::UChar32) CBU_SENTINEL) {

943 *pattern = pattern_next;

944 *string = string_next;

945 } else {

946 // Uh ho, it did not match, we are done. If the last char was an

947 // escapement, that means that it was an error to advance the ptr here,

948 // let's put it back where it was. This also mean that the MatchPattern

949 // function will return false because if we can't match an escape char

950 // here, then no one will.

951 if (escape) {

952 *pattern = escape;

953 }

954 return;

955 }

956

957 escape = NULL;

958 }

959 }

960

961 template <typename CHAR, typename NEXT>

962 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {

963 while (*pattern != end) {

964 if (!IsWildcard(**pattern))

965 return;

966 next(pattern, end);

967 }

968 }

969

970 template <typename CHAR, typename NEXT>

971 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,

972 const CHAR* pattern, const CHAR* pattern_end,

973 int depth,

974 NEXT next) {

975 const int kMaxDepth = 16;

976 if (depth > kMaxDepth)

977 return false;

978

979 // Eat all the matching chars.

980 EatSameChars(&pattern, pattern_end, &eval, eval_end, next);

981

982 // If the string is empty, then the pattern must be empty too, or contains

983 // only wildcards.

984 if (eval == eval_end) {

985 EatWildcard(&pattern, pattern_end, next);

986 return pattern == pattern_end;

987 }

988

989 // Pattern is empty but not string, this is not a match.

990 if (pattern == pattern_end)

991 return false;

992

993 // If this is a question mark, then we need to compare the rest with

994 // the current string or the string with one character eaten.

995 const CHAR* next_pattern = pattern;

996 next(&next_pattern, pattern_end);

997 if (pattern[0] == '?') {

998 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,

999 depth + 1, next))

1000 return true;

1001 const CHAR* next_eval = eval;

1002 next(&next_eval, eval_end);

1003 if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,

1004 depth + 1, next))

1005 return true;

1006 }

1007

1008 // This is a *, try to match all the possible substrings with the remainder

1009 // of the pattern.

1010 if (pattern[0] == '*') {

1011 // Collapse duplicate wild cards (********** into *) so that the

1012 // method does not recurse unnecessarily. http://crbug.com/52839

1013 EatWildcard(&next_pattern, pattern_end, next);

1014

1015 while (eval != eval_end) {

1016 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,

1017 depth + 1, next))

1018 return true;

1019 eval++;

1020 }

1021

1022 // We reached the end of the string, let see if the pattern contains only

1023 // wildcards.

1024 if (eval == eval_end) {

1025 EatWildcard(&pattern, pattern_end, next);

1026 if (pattern != pattern_end)

1027 return false;

1028 return true;

1029 }

1030 }

1031

1032 return false;

1033 }

1034

1035 struct NextCharUTF8 {

1036 base_icu::UChar32 operator()(const char** p, const char* end) {

1037 base_icu::UChar32 c;

1038 int offset = 0;

1039 CBU8_NEXT(p, offset, end - p, c);

1040 *p += offset;

1041 return c;

1042 }

1043 };

1044

1045 struct NextCharUTF16 {

1046 base_icu::UChar32 operator()(const char16** p, const char16* end) {

1047 base_icu::UChar32 c;

1048 int offset = 0;

1049 CBU16_NEXT(p, offset, end - p, c);

1050 *p += offset;

1051 return c;

1052 }

1053 };

1054

1055 bool MatchPattern(const base::StringPiece& eval,

1056 const base::StringPiece& pattern) {

1057 return MatchPatternT(eval.data(), eval.data() + eval.size(),

1058 pattern.data(), pattern.data() + pattern.size(),

1059 0, NextCharUTF8());

1060 }

1061

1062 bool MatchPattern(const string16& eval, const string16& pattern) {

1063 return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(),

1064 pattern.c_str(), pattern.c_str() + pattern.size(),

1065 0, NextCharUTF16());

1066 }

1067

1068 // The following code is compatible with the OpenBSD lcpy interface. See:

1069 // http://www.gratisoft.us/todd/papers/strlcpy.html

1070 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c

1071

1072 namespace {

1073

1074 template <typename CHAR>

1075 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {

1076 for (size_t i = 0; i < dst_size; ++i) {

1077 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL.

1078 return i;

1079 }

1080

1081 // We were left off at dst_size. We over copied 1 byte. Null terminate.

1082 if (dst_size != 0)

1083 dst[dst_size - 1] = 0;

1084

1085 // Count the rest of the \|src\|, and return it's length in characters.

1086 while (src[dst_size]) ++dst_size;

1087 return dst_size;

1088 }

1089

1090 } // namespace

1091

1092 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {

1093 return lcpyT<char>(dst, src, dst_size);

1094 }

1095 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {

1096 return lcpyT<wchar_t>(dst, src, dst_size);

1097 }

OLD	NEW

« base/string_util_static.h ('K') | « base/string_util_static.h ('k') | base/tracked_objects.h » ('j') | no next file with comments »