third_party/WebKit/Source/wtf/text/WTFString.h - Issue 2764243002: Move files in wtf/ to platform/wtf/ (Part 9).

Side by Side Diff: third_party/WebKit/Source/wtf/text/WTFString.h

Issue 2764243002: Move files in wtf/ to platform/wtf/ (Part 9). (Closed)

Patch Set: Rebase. Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 // Copyright 2017 The Chromium Authors. All rights reserved.

2 * (C) 1999 Lars Knoll (knoll@kde.org)	2 // Use of this source code is governed by a BSD-style license that can be

3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012, 2013 Apple Inc.	3 // found in the LICENSE file.

4 * All rights reserved.

5 *

6 * This library is free software; you can redistribute it and/or

7 * modify it under the terms of the GNU Library General Public

8 * License as published by the Free Software Foundation; either

9 * version 2 of the License, or (at your option) any later version.

10 *

11 * This library is distributed in the hope that it will be useful,

12 * but WITHOUT ANY WARRANTY; without even the implied warranty of

13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

14 * Library General Public License for more details.

15 *

16 * You should have received a copy of the GNU Library General Public License

17 * along with this library; see the file COPYING.LIB. If not, write to

18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,

19 * Boston, MA 02110-1301, USA.

20 *

21 */

22	4

23 #ifndef WTFString_h	5 #include "platform/wtf/text/WTFString.h"

24 #define WTFString_h

25	6

26 // This file would be called String.h, but that conflicts with <string.h>	7 // The contents of this header was moved to platform/wtf as part of

27 // on systems without case-sensitive file systems.	8 // WTF migration project. See the following post for details:

28	9 // https://groups.google.com/a/chromium.org/d/msg/blink-dev/tLdAZCTlcAA/bYXVT8gY CAAJ

29 #include "wtf/Allocator.h"

30 #include "wtf/Compiler.h"

31 #include "wtf/HashTableDeletedValueType.h"

32 #include "wtf/WTFExport.h"

33 #include "wtf/text/ASCIIFastPath.h"

34 #include "wtf/text/StringImpl.h"

35 #include "wtf/text/StringView.h"

36 #include <algorithm>

37 #include <iosfwd>

38

39 #ifdef __OBJC__

40 #include <objc/objc.h>

41 #endif

42

43 namespace WTF {

44

45 class CString;

46 struct StringHash;

47

48 enum UTF8ConversionMode {

49 LenientUTF8Conversion,

50 StrictUTF8Conversion,

51 StrictUTF8ConversionReplacingUnpairedSurrogatesWithFFFD

52 };

53

54 #define DISPATCH_CASE_OP(caseSensitivity, op, args) \

55 ((caseSensitivity == TextCaseSensitive) \

56 ? op args \

57 : (caseSensitivity == TextCaseASCIIInsensitive) \

58 ? op##IgnoringASCIICase args \

59 : op##IgnoringCase args)

60

61 // You can find documentation about this class in this doc:

62 // https://docs.google.com/document/d/1kOCUlJdh2WJMJGDf-WoEQhmnjKLaOYRbiHz5TiGJl 14/edit?usp=sharing

63 class WTF_EXPORT String {

64 USING_FAST_MALLOC(String);

65

66 public:

67 // Construct a null string, distinguishable from an empty string.

68 String() {}

69

70 // Construct a string with UTF-16 data.

71 String(const UChar* characters, unsigned length);

72

73 // Construct a string by copying the contents of a vector.

74 // This method will never create a null string. Vectors with size() == 0

75 // will return the empty string.

76 // NOTE: This is different from String(vector.data(), vector.size())

77 // which will sometimes return a null string when vector.data() is null

78 // which can only occur for vectors without inline capacity.

79 // See: https://bugs.webkit.org/show_bug.cgi?id=109792

80 template <size_t inlineCapacity>

81 explicit String(const Vector<UChar, inlineCapacity>&);

82

83 // Construct a string with UTF-16 data, from a null-terminated source.

84 String(const UChar*);

85 String(const char16_t* chars)

86 : String(reinterpret_cast<const UChar*>(chars)) {}

87

88 // Construct a string with latin1 data.

89 String(const LChar* characters, unsigned length);

90 String(const char* characters, unsigned length);

91

92 // Construct a string with latin1 data, from a null-terminated source.

93 String(const LChar* characters)

94 : String(reinterpret_cast<const char*>(characters)) {}

95 String(const char* characters)

96 : String(characters, characters ? strlen(characters) : 0) {}

97

98 // Construct a string referencing an existing StringImpl.

99 String(StringImpl* impl) : m_impl(impl) {}

100 String(PassRefPtr<StringImpl> impl) : m_impl(std::move(impl)) {}

101

102 void swap(String& o) { m_impl.swap(o.m_impl); }

103

104 template <typename CharType>

105 static String adopt(StringBuffer<CharType>& buffer) {

106 if (!buffer.length())

107 return StringImpl::empty;

108 return String(buffer.release());

109 }

110

111 explicit operator bool() const { return !isNull(); }

112 bool isNull() const { return !m_impl; }

113 bool isEmpty() const { return !m_impl \|\| !m_impl->length(); }

114

115 StringImpl* impl() const { return m_impl.get(); }

116 PassRefPtr<StringImpl> releaseImpl() { return m_impl.release(); }

117

118 unsigned length() const {

119 if (!m_impl)

120 return 0;

121 return m_impl->length();

122 }

123

124 const LChar* characters8() const {

125 if (!m_impl)

126 return 0;

127 DCHECK(m_impl->is8Bit());

128 return m_impl->characters8();

129 }

130

131 const UChar* characters16() const {

132 if (!m_impl)

133 return 0;

134 DCHECK(!m_impl->is8Bit());

135 return m_impl->characters16();

136 }

137

138 // Return characters8() or characters16() depending on CharacterType.

139 template <typename CharacterType>

140 inline const CharacterType* getCharacters() const;

141

142 bool is8Bit() const { return m_impl->is8Bit(); }

143

144 CString ascii() const;

145 CString latin1() const;

146 CString utf8(UTF8ConversionMode = LenientUTF8Conversion) const;

147

148 UChar operator[](unsigned index) const {

149 if (!m_impl \|\| index >= m_impl->length())

150 return 0;

151 return (*m_impl)[index];

152 }

153

154 static String number(int);

155 static String number(unsigned);

156 static String number(long);

157 static String number(unsigned long);

158 static String number(long long);

159 static String number(unsigned long long);

160

161 static String number(double, unsigned precision = 6);

162

163 // Number to String conversion following the ECMAScript definition.

164 static String numberToStringECMAScript(double);

165 static String numberToStringFixedWidth(double, unsigned decimalPlaces);

166

167 // Find characters.

168 size_t find(UChar c, unsigned start = 0) const {

169 return m_impl ? m_impl->find(c, start) : kNotFound;

170 }

171 size_t find(LChar c, unsigned start = 0) const {

172 return m_impl ? m_impl->find(c, start) : kNotFound;

173 }

174 size_t find(char c, unsigned start = 0) const {

175 return find(static_cast<LChar>(c), start);

176 }

177 size_t find(CharacterMatchFunctionPtr matchFunction,

178 unsigned start = 0) const {

179 return m_impl ? m_impl->find(matchFunction, start) : kNotFound;

180 }

181

182 // Find substrings.

183 size_t find(const StringView& value,

184 unsigned start = 0,

185 TextCaseSensitivity caseSensitivity = TextCaseSensitive) const {

186 return m_impl

187 ? DISPATCH_CASE_OP(caseSensitivity, m_impl->find, (value, start))

188 : kNotFound;

189 }

190

191 // Unicode aware case insensitive string matching. Non-ASCII characters might

192 // match to ASCII characters. This function is rarely used to implement web

193 // platform features.

194 size_t findIgnoringCase(const StringView& value, unsigned start = 0) const {

195 return m_impl ? m_impl->findIgnoringCase(value, start) : kNotFound;

196 }

197

198 // ASCII case insensitive string matching.

199 size_t findIgnoringASCIICase(const StringView& value,

200 unsigned start = 0) const {

201 return m_impl ? m_impl->findIgnoringASCIICase(value, start) : kNotFound;

202 }

203

204 bool contains(char c) const { return find(c) != kNotFound; }

205 bool contains(const StringView& value,

206 TextCaseSensitivity caseSensitivity = TextCaseSensitive) const {

207 return find(value, 0, caseSensitivity) != kNotFound;

208 }

209

210 // Find the last instance of a single character or string.

211 size_t reverseFind(UChar c, unsigned start = UINT_MAX) const {

212 return m_impl ? m_impl->reverseFind(c, start) : kNotFound;

213 }

214 size_t reverseFind(const StringView& value, unsigned start = UINT_MAX) const {

215 return m_impl ? m_impl->reverseFind(value, start) : kNotFound;

216 }

217

218 UChar32 characterStartingAt(unsigned) const;

219

220 bool startsWith(

221 const StringView& prefix,

222 TextCaseSensitivity caseSensitivity = TextCaseSensitive) const {

223 return m_impl

224 ? DISPATCH_CASE_OP(caseSensitivity, m_impl->startsWith, (prefix))

225 : prefix.isEmpty();

226 }

227 bool startsWith(UChar character) const {

228 return m_impl ? m_impl->startsWith(character) : false;

229 }

230

231 bool endsWith(const StringView& suffix,

232 TextCaseSensitivity caseSensitivity = TextCaseSensitive) const {

233 return m_impl

234 ? DISPATCH_CASE_OP(caseSensitivity, m_impl->endsWith, (suffix))

235 : suffix.isEmpty();

236 }

237 bool endsWith(UChar character) const {

238 return m_impl ? m_impl->endsWith(character) : false;

239 }

240

241 void append(const StringView&);

242 void append(LChar);

243 void append(char c) { append(static_cast<LChar>(c)); }

244 void append(UChar);

245 void insert(const StringView&, unsigned pos);

246

247 // TODO(esprehn): replace strangely both modifies this String and return a

248 // value. It should only do one of those.

249 String& replace(UChar pattern, UChar replacement) {

250 if (m_impl)

251 m_impl = m_impl->replace(pattern, replacement);

252 return *this;

253 }

254 String& replace(UChar pattern, const StringView& replacement) {

255 if (m_impl)

256 m_impl = m_impl->replace(pattern, replacement);

257 return *this;

258 }

259 String& replace(const StringView& pattern, const StringView& replacement) {

260 if (m_impl)

261 m_impl = m_impl->replace(pattern, replacement);

262 return *this;

263 }

264 String& replace(unsigned index,

265 unsigned lengthToReplace,

266 const StringView& replacement) {

267 if (m_impl)

268 m_impl = m_impl->replace(index, lengthToReplace, replacement);

269 return *this;

270 }

271

272 void fill(UChar c) {

273 if (m_impl)

274 m_impl = m_impl->fill(c);

275 }

276

277 void ensure16Bit();

278

279 void truncate(unsigned length);

280 void remove(unsigned start, unsigned length = 1);

281

282 String substring(unsigned pos, unsigned len = UINT_MAX) const;

283 String left(unsigned len) const { return substring(0, len); }

284 String right(unsigned len) const { return substring(length() - len, len); }

285

286 // Returns a lowercase/uppercase version of the string. These functions might

287 // convert non-ASCII characters to ASCII characters. For example, lower() for

288 // U+212A is 'k', upper() for U+017F is 'S'.

289 // These functions are rarely used to implement web platform features.

290 String lower() const;

291 String upper() const;

292

293 String lower(const AtomicString& localeIdentifier) const;

294 String upper(const AtomicString& localeIdentifier) const;

295

296 // Returns a uppercase version of the string.

297 // This function converts ASCII characters only.

298 String upperASCII() const;

299

300 String stripWhiteSpace() const;

301 String stripWhiteSpace(IsWhiteSpaceFunctionPtr) const;

302 String simplifyWhiteSpace(StripBehavior = StripExtraWhiteSpace) const;

303 String simplifyWhiteSpace(IsWhiteSpaceFunctionPtr,

304 StripBehavior = StripExtraWhiteSpace) const;

305

306 String removeCharacters(CharacterMatchFunctionPtr) const;

307 template <bool isSpecialCharacter(UChar)>

308 bool isAllSpecialCharacters() const;

309

310 // Return the string with case folded for case insensitive comparison.

311 String foldCase() const;

312

313 // Takes a printf format and args and prints into a String.

314 PRINTF_FORMAT(1, 2) static String format(const char* format, ...);

315

316 // Returns an uninitialized string. The characters needs to be written

317 // into the buffer returned in data before the returned string is used.

318 // Failure to do this will have unpredictable results.

319 static String createUninitialized(unsigned length, UChar*& data) {

320 return StringImpl::createUninitialized(length, data);

321 }

322 static String createUninitialized(unsigned length, LChar*& data) {

323 return StringImpl::createUninitialized(length, data);

324 }

325

326 void split(const StringView& separator,

327 bool allowEmptyEntries,

328 Vector<String>& result) const;

329 void split(const StringView& separator, Vector<String>& result) const {

330 split(separator, false, result);

331 }

332 void split(UChar separator,

333 bool allowEmptyEntries,

334 Vector<String>& result) const;

335 void split(UChar separator, Vector<String>& result) const {

336 split(separator, false, result);

337 }

338

339 // Copy characters out of the string. See StringImpl.h for detailed docs.

340 unsigned copyTo(UChar* buffer, unsigned start, unsigned maxLength) const {

341 return m_impl ? m_impl->copyTo(buffer, start, maxLength) : 0;

342 }

343 template <typename BufferType>

344 void appendTo(BufferType&,

345 unsigned start = 0,

346 unsigned length = UINT_MAX) const;

347 template <typename BufferType>

348 void prependTo(BufferType&,

349 unsigned start = 0,

350 unsigned length = UINT_MAX) const;

351

352 // Convert the string into a number.

353

354 int toIntStrict(bool* ok = 0, int base = 10) const;

355 unsigned toUIntStrict(bool* ok = 0, int base = 10) const;

356 int64_t toInt64Strict(bool* ok = 0, int base = 10) const;

357 uint64_t toUInt64Strict(bool* ok = 0, int base = 10) const;

358

359 int toInt(bool* ok = 0) const;

360 unsigned toUInt(bool* ok = 0) const;

361 int64_t toInt64(bool* ok = 0) const;

362 uint64_t toUInt64(bool* ok = 0) const;

363

364 // FIXME: Like the strict functions above, these give false for "ok" when

365 // there is trailing garbage. Like the non-strict functions above, these

366 // return the value when there is trailing garbage. It would be better if

367 // these were more consistent with the above functions instead.

368 double toDouble(bool* ok = 0) const;

369 float toFloat(bool* ok = 0) const;

370

371 String isolatedCopy() const;

372 bool isSafeToSendToAnotherThread() const;

373

374 #ifdef __OBJC__

375 String(NSString*);

376

377 // This conversion maps null string to "", which loses the meaning of null

378 // string, but we need this mapping because AppKit crashes when passed nil

379 // NSStrings.

380 operator NSString*() const {

381 if (!m_impl)

382 return @"";

383 return *m_impl;

384 }

385 #endif

386

387 static String make8BitFrom16BitSource(const UChar*, size_t);

388 template <size_t inlineCapacity>

389 static String make8BitFrom16BitSource(

390 const Vector<UChar, inlineCapacity>& buffer) {

391 return make8BitFrom16BitSource(buffer.data(), buffer.size());

392 }

393

394 static String make16BitFrom8BitSource(const LChar*, size_t);

395

396 // String::fromUTF8 will return a null string if

397 // the input data contains invalid UTF-8 sequences.

398 static String fromUTF8(const LChar*, size_t);

399 static String fromUTF8(const LChar*);

400 static String fromUTF8(const char* s, size_t length) {

401 return fromUTF8(reinterpret_cast<const LChar*>(s), length);

402 }

403 static String fromUTF8(const char* s) {

404 return fromUTF8(reinterpret_cast<const LChar*>(s));

405 }

406 static String fromUTF8(const CString&);

407

408 // Tries to convert the passed in string to UTF-8, but will fall back to

409 // Latin-1 if the string is not valid UTF-8.

410 static String fromUTF8WithLatin1Fallback(const LChar*, size_t);

411 static String fromUTF8WithLatin1Fallback(const char* s, size_t length) {

412 return fromUTF8WithLatin1Fallback(reinterpret_cast<const LChar*>(s),

413 length);

414 }

415

416 bool containsOnlyASCII() const {

417 return !m_impl \|\| m_impl->containsOnlyASCII();

418 }

419 bool containsOnlyLatin1() const;

420 bool containsOnlyWhitespace() const {

421 return !m_impl \|\| m_impl->containsOnlyWhitespace();

422 }

423

424 size_t charactersSizeInBytes() const {

425 return m_impl ? m_impl->charactersSizeInBytes() : 0;

426 }

427

428 // Hash table deleted values, which are only constructed and never copied or

429 // destroyed.

430 String(WTF::HashTableDeletedValueType) : m_impl(WTF::HashTableDeletedValue) {}

431 bool isHashTableDeletedValue() const {

432 return m_impl.isHashTableDeletedValue();

433 }

434

435 #ifndef NDEBUG

436 // For use in the debugger.

437 void show() const;

438 #endif

439

440 private:

441 template <typename CharacterType>

442 void appendInternal(CharacterType);

443

444 RefPtr<StringImpl> m_impl;

445 };

446

447 #undef DISPATCH_CASE_OP

448

449 inline bool operator==(const String& a, const String& b) {

450 // We don't use equalStringView here since we want the isAtomic() fast path

451 // inside WTF::equal.

452 return equal(a.impl(), b.impl());

453 }

454 inline bool operator==(const String& a, const char* b) {

455 return equalStringView(a, b);

456 }

457 inline bool operator==(const char* a, const String& b) {

458 return b == a;

459 }

460

461 inline bool operator!=(const String& a, const String& b) {

462 return !(a == b);

463 }

464 inline bool operator!=(const String& a, const char* b) {

465 return !(a == b);

466 }

467 inline bool operator!=(const char* a, const String& b) {

468 return !(a == b);

469 }

470

471 inline bool equalPossiblyIgnoringCase(const String& a,

472 const String& b,

473 bool ignoreCase) {

474 return ignoreCase ? equalIgnoringCase(a, b) : (a == b);

475 }

476

477 inline bool equalIgnoringNullity(const String& a, const String& b) {

478 return equalIgnoringNullity(a.impl(), b.impl());

479 }

480

481 template <size_t inlineCapacity>

482 inline bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a,

483 const String& b) {

484 return equalIgnoringNullity(a, b.impl());

485 }

486

487 inline void swap(String& a, String& b) {

488 a.swap(b);

489 }

490

491 // Definitions of string operations

492

493 template <size_t inlineCapacity>

494 String::String(const Vector<UChar, inlineCapacity>& vector)

495 : m_impl(vector.size() ? StringImpl::create(vector.data(), vector.size())

496 : StringImpl::empty) {}

497

498 template <>

499 inline const LChar* String::getCharacters<LChar>() const {

500 DCHECK(is8Bit());

501 return characters8();

502 }

503

504 template <>

505 inline const UChar* String::getCharacters<UChar>() const {

506 DCHECK(!is8Bit());

507 return characters16();

508 }

509

510 inline bool String::containsOnlyLatin1() const {

511 if (isEmpty())

512 return true;

513

514 if (is8Bit())

515 return true;

516

517 const UChar* characters = characters16();

518 UChar ored = 0;

519 for (size_t i = 0; i < m_impl->length(); ++i)

520 ored \|= characters[i];

521 return !(ored & 0xFF00);

522 }

523

524 #ifdef __OBJC__

525 // This is for situations in WebKit where the long standing behavior has been

526 // "nil if empty", so we try to maintain longstanding behavior for the sake of

527 // entrenched clients

528 inline NSString* nsStringNilIfEmpty(const String& str) {

529 return str.isEmpty() ? nil : (NSString*)str;

530 }

531 #endif

532

533 WTF_EXPORT int codePointCompare(const String&, const String&);

534

535 inline bool codePointCompareLessThan(const String& a, const String& b) {

536 return codePointCompare(a.impl(), b.impl()) < 0;

537 }

538

539 WTF_EXPORT int codePointCompareIgnoringASCIICase(const String&, const char*);

540

541 template <bool isSpecialCharacter(UChar)>

542 inline bool String::isAllSpecialCharacters() const {

543 return StringView(*this).isAllSpecialCharacters<isSpecialCharacter>();

544 }

545

546 template <typename BufferType>

547 void String::appendTo(BufferType& result,

548 unsigned position,

549 unsigned length) const {

550 if (!m_impl)

551 return;

552 m_impl->appendTo(result, position, length);

553 }

554

555 template <typename BufferType>

556 void String::prependTo(BufferType& result,

557 unsigned position,

558 unsigned length) const {

559 if (!m_impl)

560 return;

561 m_impl->prependTo(result, position, length);

562 }

563

564 // StringHash is the default hash for String

565 template <typename T>

566 struct DefaultHash;

567 template <>

568 struct DefaultHash<String> {

569 typedef StringHash Hash;

570 };

571

572 // Shared global empty string.

573 WTF_EXPORT extern const String& emptyString;

574 WTF_EXPORT extern const String& emptyString16Bit;

575 WTF_EXPORT extern const String& xmlnsWithColon;

576

577 // Pretty printer for gtest and base/logging.*. It prepends and appends

578 // double-quotes, and escapes chracters other than ASCII printables.

579 WTF_EXPORT std::ostream& operator<<(std::ostream&, const String&);

580

581 inline StringView::StringView(const String& string,

582 unsigned offset,

583 unsigned length)

584 : StringView(string.impl(), offset, length) {}

585 inline StringView::StringView(const String& string, unsigned offset)

586 : StringView(string.impl(), offset) {}

587 inline StringView::StringView(const String& string)

588 : StringView(string.impl()) {}

589

590 } // namespace WTF

591

592 WTF_ALLOW_MOVE_AND_INIT_WITH_MEM_FUNCTIONS(String);

593

594 using WTF::CString;

595 using WTF::StrictUTF8Conversion;

596 using WTF::StrictUTF8ConversionReplacingUnpairedSurrogatesWithFFFD;

597 using WTF::String;

598 using WTF::emptyString;

599 using WTF::emptyString16Bit;

600 using WTF::charactersAreAllASCII;

601 using WTF::equal;

602 using WTF::find;

603 using WTF::isSpaceOrNewline;

604

605 #include "wtf/text/AtomicString.h"

606 #endif // WTFString_h

OLD	NEW

« no previous file with comments | « third_party/WebKit/Source/wtf/text/UTF8.cpp ('k') | third_party/WebKit/Source/wtf/text/WTFString.cpp » ('j') | no next file with comments »