Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(165)

Side by Side Diff: third_party/WebKit/Source/wtf/text/WTFString.h

Issue 2764243002: Move files in wtf/ to platform/wtf/ (Part 9). (Closed)
Patch Set: Rebase. Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 * (C) 1999 Lars Knoll (knoll@kde.org) 2 // Use of this source code is governed by a BSD-style license that can be
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012, 2013 Apple Inc. 3 // found in the LICENSE file.
4 * All rights reserved.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22 4
23 #ifndef WTFString_h 5 #include "platform/wtf/text/WTFString.h"
24 #define WTFString_h
25 6
26 // This file would be called String.h, but that conflicts with <string.h> 7 // The contents of this header was moved to platform/wtf as part of
27 // on systems without case-sensitive file systems. 8 // WTF migration project. See the following post for details:
28 9 // https://groups.google.com/a/chromium.org/d/msg/blink-dev/tLdAZCTlcAA/bYXVT8gY CAAJ
29 #include "wtf/Allocator.h"
30 #include "wtf/Compiler.h"
31 #include "wtf/HashTableDeletedValueType.h"
32 #include "wtf/WTFExport.h"
33 #include "wtf/text/ASCIIFastPath.h"
34 #include "wtf/text/StringImpl.h"
35 #include "wtf/text/StringView.h"
36 #include <algorithm>
37 #include <iosfwd>
38
39 #ifdef __OBJC__
40 #include <objc/objc.h>
41 #endif
42
43 namespace WTF {
44
45 class CString;
46 struct StringHash;
47
48 enum UTF8ConversionMode {
49 LenientUTF8Conversion,
50 StrictUTF8Conversion,
51 StrictUTF8ConversionReplacingUnpairedSurrogatesWithFFFD
52 };
53
54 #define DISPATCH_CASE_OP(caseSensitivity, op, args) \
55 ((caseSensitivity == TextCaseSensitive) \
56 ? op args \
57 : (caseSensitivity == TextCaseASCIIInsensitive) \
58 ? op##IgnoringASCIICase args \
59 : op##IgnoringCase args)
60
61 // You can find documentation about this class in this doc:
62 // https://docs.google.com/document/d/1kOCUlJdh2WJMJGDf-WoEQhmnjKLaOYRbiHz5TiGJl 14/edit?usp=sharing
63 class WTF_EXPORT String {
64 USING_FAST_MALLOC(String);
65
66 public:
67 // Construct a null string, distinguishable from an empty string.
68 String() {}
69
70 // Construct a string with UTF-16 data.
71 String(const UChar* characters, unsigned length);
72
73 // Construct a string by copying the contents of a vector.
74 // This method will never create a null string. Vectors with size() == 0
75 // will return the empty string.
76 // NOTE: This is different from String(vector.data(), vector.size())
77 // which will sometimes return a null string when vector.data() is null
78 // which can only occur for vectors without inline capacity.
79 // See: https://bugs.webkit.org/show_bug.cgi?id=109792
80 template <size_t inlineCapacity>
81 explicit String(const Vector<UChar, inlineCapacity>&);
82
83 // Construct a string with UTF-16 data, from a null-terminated source.
84 String(const UChar*);
85 String(const char16_t* chars)
86 : String(reinterpret_cast<const UChar*>(chars)) {}
87
88 // Construct a string with latin1 data.
89 String(const LChar* characters, unsigned length);
90 String(const char* characters, unsigned length);
91
92 // Construct a string with latin1 data, from a null-terminated source.
93 String(const LChar* characters)
94 : String(reinterpret_cast<const char*>(characters)) {}
95 String(const char* characters)
96 : String(characters, characters ? strlen(characters) : 0) {}
97
98 // Construct a string referencing an existing StringImpl.
99 String(StringImpl* impl) : m_impl(impl) {}
100 String(PassRefPtr<StringImpl> impl) : m_impl(std::move(impl)) {}
101
102 void swap(String& o) { m_impl.swap(o.m_impl); }
103
104 template <typename CharType>
105 static String adopt(StringBuffer<CharType>& buffer) {
106 if (!buffer.length())
107 return StringImpl::empty;
108 return String(buffer.release());
109 }
110
111 explicit operator bool() const { return !isNull(); }
112 bool isNull() const { return !m_impl; }
113 bool isEmpty() const { return !m_impl || !m_impl->length(); }
114
115 StringImpl* impl() const { return m_impl.get(); }
116 PassRefPtr<StringImpl> releaseImpl() { return m_impl.release(); }
117
118 unsigned length() const {
119 if (!m_impl)
120 return 0;
121 return m_impl->length();
122 }
123
124 const LChar* characters8() const {
125 if (!m_impl)
126 return 0;
127 DCHECK(m_impl->is8Bit());
128 return m_impl->characters8();
129 }
130
131 const UChar* characters16() const {
132 if (!m_impl)
133 return 0;
134 DCHECK(!m_impl->is8Bit());
135 return m_impl->characters16();
136 }
137
138 // Return characters8() or characters16() depending on CharacterType.
139 template <typename CharacterType>
140 inline const CharacterType* getCharacters() const;
141
142 bool is8Bit() const { return m_impl->is8Bit(); }
143
144 CString ascii() const;
145 CString latin1() const;
146 CString utf8(UTF8ConversionMode = LenientUTF8Conversion) const;
147
148 UChar operator[](unsigned index) const {
149 if (!m_impl || index >= m_impl->length())
150 return 0;
151 return (*m_impl)[index];
152 }
153
154 static String number(int);
155 static String number(unsigned);
156 static String number(long);
157 static String number(unsigned long);
158 static String number(long long);
159 static String number(unsigned long long);
160
161 static String number(double, unsigned precision = 6);
162
163 // Number to String conversion following the ECMAScript definition.
164 static String numberToStringECMAScript(double);
165 static String numberToStringFixedWidth(double, unsigned decimalPlaces);
166
167 // Find characters.
168 size_t find(UChar c, unsigned start = 0) const {
169 return m_impl ? m_impl->find(c, start) : kNotFound;
170 }
171 size_t find(LChar c, unsigned start = 0) const {
172 return m_impl ? m_impl->find(c, start) : kNotFound;
173 }
174 size_t find(char c, unsigned start = 0) const {
175 return find(static_cast<LChar>(c), start);
176 }
177 size_t find(CharacterMatchFunctionPtr matchFunction,
178 unsigned start = 0) const {
179 return m_impl ? m_impl->find(matchFunction, start) : kNotFound;
180 }
181
182 // Find substrings.
183 size_t find(const StringView& value,
184 unsigned start = 0,
185 TextCaseSensitivity caseSensitivity = TextCaseSensitive) const {
186 return m_impl
187 ? DISPATCH_CASE_OP(caseSensitivity, m_impl->find, (value, start))
188 : kNotFound;
189 }
190
191 // Unicode aware case insensitive string matching. Non-ASCII characters might
192 // match to ASCII characters. This function is rarely used to implement web
193 // platform features.
194 size_t findIgnoringCase(const StringView& value, unsigned start = 0) const {
195 return m_impl ? m_impl->findIgnoringCase(value, start) : kNotFound;
196 }
197
198 // ASCII case insensitive string matching.
199 size_t findIgnoringASCIICase(const StringView& value,
200 unsigned start = 0) const {
201 return m_impl ? m_impl->findIgnoringASCIICase(value, start) : kNotFound;
202 }
203
204 bool contains(char c) const { return find(c) != kNotFound; }
205 bool contains(const StringView& value,
206 TextCaseSensitivity caseSensitivity = TextCaseSensitive) const {
207 return find(value, 0, caseSensitivity) != kNotFound;
208 }
209
210 // Find the last instance of a single character or string.
211 size_t reverseFind(UChar c, unsigned start = UINT_MAX) const {
212 return m_impl ? m_impl->reverseFind(c, start) : kNotFound;
213 }
214 size_t reverseFind(const StringView& value, unsigned start = UINT_MAX) const {
215 return m_impl ? m_impl->reverseFind(value, start) : kNotFound;
216 }
217
218 UChar32 characterStartingAt(unsigned) const;
219
220 bool startsWith(
221 const StringView& prefix,
222 TextCaseSensitivity caseSensitivity = TextCaseSensitive) const {
223 return m_impl
224 ? DISPATCH_CASE_OP(caseSensitivity, m_impl->startsWith, (prefix))
225 : prefix.isEmpty();
226 }
227 bool startsWith(UChar character) const {
228 return m_impl ? m_impl->startsWith(character) : false;
229 }
230
231 bool endsWith(const StringView& suffix,
232 TextCaseSensitivity caseSensitivity = TextCaseSensitive) const {
233 return m_impl
234 ? DISPATCH_CASE_OP(caseSensitivity, m_impl->endsWith, (suffix))
235 : suffix.isEmpty();
236 }
237 bool endsWith(UChar character) const {
238 return m_impl ? m_impl->endsWith(character) : false;
239 }
240
241 void append(const StringView&);
242 void append(LChar);
243 void append(char c) { append(static_cast<LChar>(c)); }
244 void append(UChar);
245 void insert(const StringView&, unsigned pos);
246
247 // TODO(esprehn): replace strangely both modifies this String *and* return a
248 // value. It should only do one of those.
249 String& replace(UChar pattern, UChar replacement) {
250 if (m_impl)
251 m_impl = m_impl->replace(pattern, replacement);
252 return *this;
253 }
254 String& replace(UChar pattern, const StringView& replacement) {
255 if (m_impl)
256 m_impl = m_impl->replace(pattern, replacement);
257 return *this;
258 }
259 String& replace(const StringView& pattern, const StringView& replacement) {
260 if (m_impl)
261 m_impl = m_impl->replace(pattern, replacement);
262 return *this;
263 }
264 String& replace(unsigned index,
265 unsigned lengthToReplace,
266 const StringView& replacement) {
267 if (m_impl)
268 m_impl = m_impl->replace(index, lengthToReplace, replacement);
269 return *this;
270 }
271
272 void fill(UChar c) {
273 if (m_impl)
274 m_impl = m_impl->fill(c);
275 }
276
277 void ensure16Bit();
278
279 void truncate(unsigned length);
280 void remove(unsigned start, unsigned length = 1);
281
282 String substring(unsigned pos, unsigned len = UINT_MAX) const;
283 String left(unsigned len) const { return substring(0, len); }
284 String right(unsigned len) const { return substring(length() - len, len); }
285
286 // Returns a lowercase/uppercase version of the string. These functions might
287 // convert non-ASCII characters to ASCII characters. For example, lower() for
288 // U+212A is 'k', upper() for U+017F is 'S'.
289 // These functions are rarely used to implement web platform features.
290 String lower() const;
291 String upper() const;
292
293 String lower(const AtomicString& localeIdentifier) const;
294 String upper(const AtomicString& localeIdentifier) const;
295
296 // Returns a uppercase version of the string.
297 // This function converts ASCII characters only.
298 String upperASCII() const;
299
300 String stripWhiteSpace() const;
301 String stripWhiteSpace(IsWhiteSpaceFunctionPtr) const;
302 String simplifyWhiteSpace(StripBehavior = StripExtraWhiteSpace) const;
303 String simplifyWhiteSpace(IsWhiteSpaceFunctionPtr,
304 StripBehavior = StripExtraWhiteSpace) const;
305
306 String removeCharacters(CharacterMatchFunctionPtr) const;
307 template <bool isSpecialCharacter(UChar)>
308 bool isAllSpecialCharacters() const;
309
310 // Return the string with case folded for case insensitive comparison.
311 String foldCase() const;
312
313 // Takes a printf format and args and prints into a String.
314 PRINTF_FORMAT(1, 2) static String format(const char* format, ...);
315
316 // Returns an uninitialized string. The characters needs to be written
317 // into the buffer returned in data before the returned string is used.
318 // Failure to do this will have unpredictable results.
319 static String createUninitialized(unsigned length, UChar*& data) {
320 return StringImpl::createUninitialized(length, data);
321 }
322 static String createUninitialized(unsigned length, LChar*& data) {
323 return StringImpl::createUninitialized(length, data);
324 }
325
326 void split(const StringView& separator,
327 bool allowEmptyEntries,
328 Vector<String>& result) const;
329 void split(const StringView& separator, Vector<String>& result) const {
330 split(separator, false, result);
331 }
332 void split(UChar separator,
333 bool allowEmptyEntries,
334 Vector<String>& result) const;
335 void split(UChar separator, Vector<String>& result) const {
336 split(separator, false, result);
337 }
338
339 // Copy characters out of the string. See StringImpl.h for detailed docs.
340 unsigned copyTo(UChar* buffer, unsigned start, unsigned maxLength) const {
341 return m_impl ? m_impl->copyTo(buffer, start, maxLength) : 0;
342 }
343 template <typename BufferType>
344 void appendTo(BufferType&,
345 unsigned start = 0,
346 unsigned length = UINT_MAX) const;
347 template <typename BufferType>
348 void prependTo(BufferType&,
349 unsigned start = 0,
350 unsigned length = UINT_MAX) const;
351
352 // Convert the string into a number.
353
354 int toIntStrict(bool* ok = 0, int base = 10) const;
355 unsigned toUIntStrict(bool* ok = 0, int base = 10) const;
356 int64_t toInt64Strict(bool* ok = 0, int base = 10) const;
357 uint64_t toUInt64Strict(bool* ok = 0, int base = 10) const;
358
359 int toInt(bool* ok = 0) const;
360 unsigned toUInt(bool* ok = 0) const;
361 int64_t toInt64(bool* ok = 0) const;
362 uint64_t toUInt64(bool* ok = 0) const;
363
364 // FIXME: Like the strict functions above, these give false for "ok" when
365 // there is trailing garbage. Like the non-strict functions above, these
366 // return the value when there is trailing garbage. It would be better if
367 // these were more consistent with the above functions instead.
368 double toDouble(bool* ok = 0) const;
369 float toFloat(bool* ok = 0) const;
370
371 String isolatedCopy() const;
372 bool isSafeToSendToAnotherThread() const;
373
374 #ifdef __OBJC__
375 String(NSString*);
376
377 // This conversion maps null string to "", which loses the meaning of null
378 // string, but we need this mapping because AppKit crashes when passed nil
379 // NSStrings.
380 operator NSString*() const {
381 if (!m_impl)
382 return @"";
383 return *m_impl;
384 }
385 #endif
386
387 static String make8BitFrom16BitSource(const UChar*, size_t);
388 template <size_t inlineCapacity>
389 static String make8BitFrom16BitSource(
390 const Vector<UChar, inlineCapacity>& buffer) {
391 return make8BitFrom16BitSource(buffer.data(), buffer.size());
392 }
393
394 static String make16BitFrom8BitSource(const LChar*, size_t);
395
396 // String::fromUTF8 will return a null string if
397 // the input data contains invalid UTF-8 sequences.
398 static String fromUTF8(const LChar*, size_t);
399 static String fromUTF8(const LChar*);
400 static String fromUTF8(const char* s, size_t length) {
401 return fromUTF8(reinterpret_cast<const LChar*>(s), length);
402 }
403 static String fromUTF8(const char* s) {
404 return fromUTF8(reinterpret_cast<const LChar*>(s));
405 }
406 static String fromUTF8(const CString&);
407
408 // Tries to convert the passed in string to UTF-8, but will fall back to
409 // Latin-1 if the string is not valid UTF-8.
410 static String fromUTF8WithLatin1Fallback(const LChar*, size_t);
411 static String fromUTF8WithLatin1Fallback(const char* s, size_t length) {
412 return fromUTF8WithLatin1Fallback(reinterpret_cast<const LChar*>(s),
413 length);
414 }
415
416 bool containsOnlyASCII() const {
417 return !m_impl || m_impl->containsOnlyASCII();
418 }
419 bool containsOnlyLatin1() const;
420 bool containsOnlyWhitespace() const {
421 return !m_impl || m_impl->containsOnlyWhitespace();
422 }
423
424 size_t charactersSizeInBytes() const {
425 return m_impl ? m_impl->charactersSizeInBytes() : 0;
426 }
427
428 // Hash table deleted values, which are only constructed and never copied or
429 // destroyed.
430 String(WTF::HashTableDeletedValueType) : m_impl(WTF::HashTableDeletedValue) {}
431 bool isHashTableDeletedValue() const {
432 return m_impl.isHashTableDeletedValue();
433 }
434
435 #ifndef NDEBUG
436 // For use in the debugger.
437 void show() const;
438 #endif
439
440 private:
441 template <typename CharacterType>
442 void appendInternal(CharacterType);
443
444 RefPtr<StringImpl> m_impl;
445 };
446
447 #undef DISPATCH_CASE_OP
448
449 inline bool operator==(const String& a, const String& b) {
450 // We don't use equalStringView here since we want the isAtomic() fast path
451 // inside WTF::equal.
452 return equal(a.impl(), b.impl());
453 }
454 inline bool operator==(const String& a, const char* b) {
455 return equalStringView(a, b);
456 }
457 inline bool operator==(const char* a, const String& b) {
458 return b == a;
459 }
460
461 inline bool operator!=(const String& a, const String& b) {
462 return !(a == b);
463 }
464 inline bool operator!=(const String& a, const char* b) {
465 return !(a == b);
466 }
467 inline bool operator!=(const char* a, const String& b) {
468 return !(a == b);
469 }
470
471 inline bool equalPossiblyIgnoringCase(const String& a,
472 const String& b,
473 bool ignoreCase) {
474 return ignoreCase ? equalIgnoringCase(a, b) : (a == b);
475 }
476
477 inline bool equalIgnoringNullity(const String& a, const String& b) {
478 return equalIgnoringNullity(a.impl(), b.impl());
479 }
480
481 template <size_t inlineCapacity>
482 inline bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a,
483 const String& b) {
484 return equalIgnoringNullity(a, b.impl());
485 }
486
487 inline void swap(String& a, String& b) {
488 a.swap(b);
489 }
490
491 // Definitions of string operations
492
493 template <size_t inlineCapacity>
494 String::String(const Vector<UChar, inlineCapacity>& vector)
495 : m_impl(vector.size() ? StringImpl::create(vector.data(), vector.size())
496 : StringImpl::empty) {}
497
498 template <>
499 inline const LChar* String::getCharacters<LChar>() const {
500 DCHECK(is8Bit());
501 return characters8();
502 }
503
504 template <>
505 inline const UChar* String::getCharacters<UChar>() const {
506 DCHECK(!is8Bit());
507 return characters16();
508 }
509
510 inline bool String::containsOnlyLatin1() const {
511 if (isEmpty())
512 return true;
513
514 if (is8Bit())
515 return true;
516
517 const UChar* characters = characters16();
518 UChar ored = 0;
519 for (size_t i = 0; i < m_impl->length(); ++i)
520 ored |= characters[i];
521 return !(ored & 0xFF00);
522 }
523
524 #ifdef __OBJC__
525 // This is for situations in WebKit where the long standing behavior has been
526 // "nil if empty", so we try to maintain longstanding behavior for the sake of
527 // entrenched clients
528 inline NSString* nsStringNilIfEmpty(const String& str) {
529 return str.isEmpty() ? nil : (NSString*)str;
530 }
531 #endif
532
533 WTF_EXPORT int codePointCompare(const String&, const String&);
534
535 inline bool codePointCompareLessThan(const String& a, const String& b) {
536 return codePointCompare(a.impl(), b.impl()) < 0;
537 }
538
539 WTF_EXPORT int codePointCompareIgnoringASCIICase(const String&, const char*);
540
541 template <bool isSpecialCharacter(UChar)>
542 inline bool String::isAllSpecialCharacters() const {
543 return StringView(*this).isAllSpecialCharacters<isSpecialCharacter>();
544 }
545
546 template <typename BufferType>
547 void String::appendTo(BufferType& result,
548 unsigned position,
549 unsigned length) const {
550 if (!m_impl)
551 return;
552 m_impl->appendTo(result, position, length);
553 }
554
555 template <typename BufferType>
556 void String::prependTo(BufferType& result,
557 unsigned position,
558 unsigned length) const {
559 if (!m_impl)
560 return;
561 m_impl->prependTo(result, position, length);
562 }
563
564 // StringHash is the default hash for String
565 template <typename T>
566 struct DefaultHash;
567 template <>
568 struct DefaultHash<String> {
569 typedef StringHash Hash;
570 };
571
572 // Shared global empty string.
573 WTF_EXPORT extern const String& emptyString;
574 WTF_EXPORT extern const String& emptyString16Bit;
575 WTF_EXPORT extern const String& xmlnsWithColon;
576
577 // Pretty printer for gtest and base/logging.*. It prepends and appends
578 // double-quotes, and escapes chracters other than ASCII printables.
579 WTF_EXPORT std::ostream& operator<<(std::ostream&, const String&);
580
581 inline StringView::StringView(const String& string,
582 unsigned offset,
583 unsigned length)
584 : StringView(string.impl(), offset, length) {}
585 inline StringView::StringView(const String& string, unsigned offset)
586 : StringView(string.impl(), offset) {}
587 inline StringView::StringView(const String& string)
588 : StringView(string.impl()) {}
589
590 } // namespace WTF
591
592 WTF_ALLOW_MOVE_AND_INIT_WITH_MEM_FUNCTIONS(String);
593
594 using WTF::CString;
595 using WTF::StrictUTF8Conversion;
596 using WTF::StrictUTF8ConversionReplacingUnpairedSurrogatesWithFFFD;
597 using WTF::String;
598 using WTF::emptyString;
599 using WTF::emptyString16Bit;
600 using WTF::charactersAreAllASCII;
601 using WTF::equal;
602 using WTF::find;
603 using WTF::isSpaceOrNewline;
604
605 #include "wtf/text/AtomicString.h"
606 #endif // WTFString_h
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/wtf/text/UTF8.cpp ('k') | third_party/WebKit/Source/wtf/text/WTFString.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698