Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(28)

Side by Side Diff: third_party/WebKit/Source/wtf/text/StringImpl.h

Issue 2764243002: Move files in wtf/ to platform/wtf/ (Part 9). (Closed)
Patch Set: Rebase. Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org) 2 // Use of this source code is governed by a BSD-style license that can be
3 * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2013 Apple Inc. All rights 3 // found in the LICENSE file.
4 * reserved.
5 * Copyright (C) 2009 Google Inc. All rights reserved.
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23 4
24 #ifndef StringImpl_h 5 #include "platform/wtf/text/StringImpl.h"
25 #define StringImpl_h
26 6
27 #include "wtf/ASCIICType.h" 7 // The contents of this header was moved to platform/wtf as part of
28 #include "wtf/Forward.h" 8 // WTF migration project. See the following post for details:
29 #include "wtf/HashMap.h" 9 // https://groups.google.com/a/chromium.org/d/msg/blink-dev/tLdAZCTlcAA/bYXVT8gY CAAJ
30 #include "wtf/StringHasher.h"
31 #include "wtf/Vector.h"
32 #include "wtf/WTFExport.h"
33 #include "wtf/text/ASCIIFastPath.h"
34 #include "wtf/text/Unicode.h"
35 #include <limits.h>
36 #include <string.h>
37
38 #if DCHECK_IS_ON()
39 #include "wtf/ThreadRestrictionVerifier.h"
40 #endif
41
42 #if OS(MACOSX)
43 typedef const struct __CFString* CFStringRef;
44 #endif
45
46 #ifdef __OBJC__
47 @class NSString;
48 #endif
49
50 namespace WTF {
51
52 struct AlreadyHashed;
53 template <typename>
54 class RetainPtr;
55
56 enum TextCaseSensitivity {
57 TextCaseSensitive,
58 TextCaseASCIIInsensitive,
59
60 // Unicode aware case insensitive matching. Non-ASCII characters might match
61 // to ASCII characters. This flag is rarely used to implement web platform
62 // features.
63 TextCaseUnicodeInsensitive
64 };
65
66 enum StripBehavior { StripExtraWhiteSpace, DoNotStripWhiteSpace };
67
68 typedef bool (*CharacterMatchFunctionPtr)(UChar);
69 typedef bool (*IsWhiteSpaceFunctionPtr)(UChar);
70 typedef HashMap<unsigned, StringImpl*, AlreadyHashed> StaticStringsTable;
71
72 // Define STRING_STATS to turn on run time statistics of string sizes and memory
73 // usage
74 #undef STRING_STATS
75
76 #ifdef STRING_STATS
77 struct StringStats {
78 inline void add8BitString(unsigned length) {
79 ++m_totalNumberStrings;
80 ++m_number8BitStrings;
81 m_total8BitData += length;
82 }
83
84 inline void add16BitString(unsigned length) {
85 ++m_totalNumberStrings;
86 ++m_number16BitStrings;
87 m_total16BitData += length;
88 }
89
90 void removeString(StringImpl*);
91 void printStats();
92
93 static const unsigned s_printStringStatsFrequency = 5000;
94 static unsigned s_stringRemovesTillPrintStats;
95
96 unsigned m_totalNumberStrings;
97 unsigned m_number8BitStrings;
98 unsigned m_number16BitStrings;
99 unsigned long long m_total8BitData;
100 unsigned long long m_total16BitData;
101 };
102
103 void addStringForStats(StringImpl*);
104 void removeStringForStats(StringImpl*);
105
106 #define STRING_STATS_ADD_8BIT_STRING(length) \
107 StringImpl::stringStats().add8BitString(length); \
108 addStringForStats(this)
109 #define STRING_STATS_ADD_16BIT_STRING(length) \
110 StringImpl::stringStats().add16BitString(length); \
111 addStringForStats(this)
112 #define STRING_STATS_REMOVE_STRING(string) \
113 StringImpl::stringStats().removeString(string); \
114 removeStringForStats(this)
115 #else
116 #define STRING_STATS_ADD_8BIT_STRING(length) ((void)0)
117 #define STRING_STATS_ADD_16BIT_STRING(length) ((void)0)
118 #define STRING_STATS_REMOVE_STRING(string) ((void)0)
119 #endif
120
121 // You can find documentation about this class in this doc:
122 // https://docs.google.com/document/d/1kOCUlJdh2WJMJGDf-WoEQhmnjKLaOYRbiHz5TiGJl 14/edit?usp=sharing
123 class WTF_EXPORT StringImpl {
124 WTF_MAKE_NONCOPYABLE(StringImpl);
125
126 private:
127 // StringImpls are allocated out of the WTF buffer partition.
128 void* operator new(size_t);
129 void* operator new(size_t, void* ptr) { return ptr; }
130 void operator delete(void*);
131
132 // Used to construct static strings, which have an special refCount that can
133 // never hit zero. This means that the static string will never be
134 // destroyed, which is important because static strings will be shared
135 // across threads & ref-counted in a non-threadsafe manner.
136 enum ConstructEmptyStringTag { ConstructEmptyString };
137 explicit StringImpl(ConstructEmptyStringTag)
138 : m_refCount(1),
139 m_length(0),
140 m_hash(0),
141 m_containsOnlyASCII(true),
142 m_needsASCIICheck(false),
143 m_isAtomic(false),
144 m_is8Bit(true),
145 m_isStatic(true) {
146 // Ensure that the hash is computed so that AtomicStringHash can call
147 // existingHash() with impunity. The empty string is special because it
148 // is never entered into AtomicString's HashKey, but still needs to
149 // compare correctly.
150 STRING_STATS_ADD_8BIT_STRING(m_length);
151 hash();
152 }
153
154 enum ConstructEmptyString16BitTag { ConstructEmptyString16Bit };
155 explicit StringImpl(ConstructEmptyString16BitTag)
156 : m_refCount(1),
157 m_length(0),
158 m_hash(0),
159 m_containsOnlyASCII(true),
160 m_needsASCIICheck(false),
161 m_isAtomic(false),
162 m_is8Bit(false),
163 m_isStatic(true) {
164 STRING_STATS_ADD_16BIT_STRING(m_length);
165 hash();
166 }
167
168 // FIXME: there has to be a less hacky way to do this.
169 enum Force8Bit { Force8BitConstructor };
170 StringImpl(unsigned length, Force8Bit)
171 : m_refCount(1),
172 m_length(length),
173 m_hash(0),
174 m_containsOnlyASCII(!length),
175 m_needsASCIICheck(static_cast<bool>(length)),
176 m_isAtomic(false),
177 m_is8Bit(true),
178 m_isStatic(false) {
179 DCHECK(m_length);
180 STRING_STATS_ADD_8BIT_STRING(m_length);
181 }
182
183 StringImpl(unsigned length)
184 : m_refCount(1),
185 m_length(length),
186 m_hash(0),
187 m_containsOnlyASCII(!length),
188 m_needsASCIICheck(static_cast<bool>(length)),
189 m_isAtomic(false),
190 m_is8Bit(false),
191 m_isStatic(false) {
192 DCHECK(m_length);
193 STRING_STATS_ADD_16BIT_STRING(m_length);
194 }
195
196 enum StaticStringTag { StaticString };
197 StringImpl(unsigned length, unsigned hash, StaticStringTag)
198 : m_refCount(1),
199 m_length(length),
200 m_hash(hash),
201 m_containsOnlyASCII(!length),
202 m_needsASCIICheck(static_cast<bool>(length)),
203 m_isAtomic(false),
204 m_is8Bit(true),
205 m_isStatic(true) {}
206
207 public:
208 static StringImpl* empty;
209 static StringImpl* empty16Bit;
210
211 ~StringImpl();
212
213 static void initStatics();
214
215 static StringImpl* createStatic(const char* string,
216 unsigned length,
217 unsigned hash);
218 static void reserveStaticStringsCapacityForSize(unsigned size);
219 static void freezeStaticStrings();
220 static const StaticStringsTable& allStaticStrings();
221 static unsigned highestStaticStringLength() {
222 return m_highestStaticStringLength;
223 }
224
225 static PassRefPtr<StringImpl> create(const UChar*, unsigned length);
226 static PassRefPtr<StringImpl> create(const LChar*, unsigned length);
227 static PassRefPtr<StringImpl> create8BitIfPossible(const UChar*,
228 unsigned length);
229 template <size_t inlineCapacity>
230 static PassRefPtr<StringImpl> create8BitIfPossible(
231 const Vector<UChar, inlineCapacity>& vector) {
232 return create8BitIfPossible(vector.data(), vector.size());
233 }
234
235 ALWAYS_INLINE static PassRefPtr<StringImpl> create(const char* s,
236 unsigned length) {
237 return create(reinterpret_cast<const LChar*>(s), length);
238 }
239 static PassRefPtr<StringImpl> create(const LChar*);
240 ALWAYS_INLINE static PassRefPtr<StringImpl> create(const char* s) {
241 return create(reinterpret_cast<const LChar*>(s));
242 }
243
244 static PassRefPtr<StringImpl> createUninitialized(unsigned length,
245 LChar*& data);
246 static PassRefPtr<StringImpl> createUninitialized(unsigned length,
247 UChar*& data);
248
249 unsigned length() const { return m_length; }
250 bool is8Bit() const { return m_is8Bit; }
251
252 ALWAYS_INLINE const LChar* characters8() const {
253 DCHECK(is8Bit());
254 return reinterpret_cast<const LChar*>(this + 1);
255 }
256 ALWAYS_INLINE const UChar* characters16() const {
257 DCHECK(!is8Bit());
258 return reinterpret_cast<const UChar*>(this + 1);
259 }
260 ALWAYS_INLINE const void* bytes() const {
261 return reinterpret_cast<const void*>(this + 1);
262 }
263
264 template <typename CharType>
265 ALWAYS_INLINE const CharType* getCharacters() const;
266
267 size_t charactersSizeInBytes() const {
268 return length() * (is8Bit() ? sizeof(LChar) : sizeof(UChar));
269 }
270
271 bool isAtomic() const { return m_isAtomic; }
272 void setIsAtomic(bool isAtomic) { m_isAtomic = isAtomic; }
273
274 bool isStatic() const { return m_isStatic; }
275
276 bool containsOnlyASCII() const;
277
278 bool isSafeToSendToAnotherThread() const;
279
280 // The high bits of 'hash' are always empty, but we prefer to store our
281 // flags in the low bits because it makes them slightly more efficient to
282 // access. So, we shift left and right when setting and getting our hash
283 // code.
284 void setHash(unsigned hash) const {
285 DCHECK(!hasHash());
286 // Multiple clients assume that StringHasher is the canonical string
287 // hash function.
288 DCHECK(hash == (is8Bit() ? StringHasher::computeHashAndMaskTop8Bits(
289 characters8(), m_length)
290 : StringHasher::computeHashAndMaskTop8Bits(
291 characters16(), m_length)));
292 m_hash = hash;
293 DCHECK(hash); // Verify that 0 is a valid sentinel hash value.
294 }
295
296 bool hasHash() const { return m_hash != 0; }
297
298 unsigned existingHash() const {
299 DCHECK(hasHash());
300 return m_hash;
301 }
302
303 unsigned hash() const {
304 if (hasHash())
305 return existingHash();
306 return hashSlowCase();
307 }
308
309 ALWAYS_INLINE bool hasOneRef() const {
310 #if DCHECK_IS_ON()
311 DCHECK(isStatic() || m_verifier.isSafeToUse()) << asciiForDebugging();
312 #endif
313 return m_refCount == 1;
314 }
315
316 ALWAYS_INLINE void ref() const {
317 #if DCHECK_IS_ON()
318 DCHECK(isStatic() || m_verifier.onRef(m_refCount)) << asciiForDebugging();
319 #endif
320 ++m_refCount;
321 }
322
323 ALWAYS_INLINE void deref() const {
324 #if DCHECK_IS_ON()
325 DCHECK(isStatic() || m_verifier.onDeref(m_refCount))
326 << asciiForDebugging() << " " << currentThread();
327 #endif
328 if (!--m_refCount)
329 destroyIfNotStatic();
330 }
331
332 // FIXME: Does this really belong in StringImpl?
333 template <typename T>
334 static void copyChars(T* destination,
335 const T* source,
336 unsigned numCharacters) {
337 memcpy(destination, source, numCharacters * sizeof(T));
338 }
339
340 ALWAYS_INLINE static void copyChars(UChar* destination,
341 const LChar* source,
342 unsigned numCharacters) {
343 for (unsigned i = 0; i < numCharacters; ++i)
344 destination[i] = source[i];
345 }
346
347 // Some string features, like refcounting and the atomicity flag, are not
348 // thread-safe. We achieve thread safety by isolation, giving each thread
349 // its own copy of the string.
350 PassRefPtr<StringImpl> isolatedCopy() const;
351
352 PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX) const;
353
354 UChar operator[](unsigned i) const {
355 SECURITY_DCHECK(i < m_length);
356 if (is8Bit())
357 return characters8()[i];
358 return characters16()[i];
359 }
360 UChar32 characterStartingAt(unsigned);
361
362 bool containsOnlyWhitespace();
363
364 int toIntStrict(bool* ok = 0, int base = 10);
365 unsigned toUIntStrict(bool* ok = 0, int base = 10);
366 int64_t toInt64Strict(bool* ok = 0, int base = 10);
367 uint64_t toUInt64Strict(bool* ok = 0, int base = 10);
368
369 int toInt(bool* ok = 0); // ignores trailing garbage
370 unsigned toUInt(bool* ok = 0); // ignores trailing garbage
371 int64_t toInt64(bool* ok = 0); // ignores trailing garbage
372 uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage
373
374 // FIXME: Like the strict functions above, these give false for "ok" when
375 // there is trailing garbage. Like the non-strict functions above, these
376 // return the value when there is trailing garbage. It would be better if
377 // these were more consistent with the above functions instead.
378 double toDouble(bool* ok = 0);
379 float toFloat(bool* ok = 0);
380
381 PassRefPtr<StringImpl> lower();
382 PassRefPtr<StringImpl> lowerASCII();
383 PassRefPtr<StringImpl> upper();
384 PassRefPtr<StringImpl> upperASCII();
385 PassRefPtr<StringImpl> lower(const AtomicString& localeIdentifier);
386 PassRefPtr<StringImpl> upper(const AtomicString& localeIdentifier);
387
388 PassRefPtr<StringImpl> fill(UChar);
389 // FIXME: Do we need fill(char) or can we just do the right thing if UChar is
390 // ASCII?
391 PassRefPtr<StringImpl> foldCase();
392
393 PassRefPtr<StringImpl> truncate(unsigned length);
394
395 PassRefPtr<StringImpl> stripWhiteSpace();
396 PassRefPtr<StringImpl> stripWhiteSpace(IsWhiteSpaceFunctionPtr);
397 PassRefPtr<StringImpl> simplifyWhiteSpace(
398 StripBehavior = StripExtraWhiteSpace);
399 PassRefPtr<StringImpl> simplifyWhiteSpace(
400 IsWhiteSpaceFunctionPtr,
401 StripBehavior = StripExtraWhiteSpace);
402
403 PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr);
404 template <typename CharType>
405 ALWAYS_INLINE PassRefPtr<StringImpl> removeCharacters(
406 const CharType* characters,
407 CharacterMatchFunctionPtr);
408
409 // Remove characters between [start, start+lengthToRemove). The range is
410 // clamped to the size of the string. Does nothing if start >= length().
411 PassRefPtr<StringImpl> remove(unsigned start, unsigned lengthToRemove = 1);
412
413 // Find characters.
414 size_t find(LChar character, unsigned start = 0);
415 size_t find(char character, unsigned start = 0);
416 size_t find(UChar character, unsigned start = 0);
417 size_t find(CharacterMatchFunctionPtr, unsigned index = 0);
418
419 // Find substrings.
420 size_t find(const StringView&, unsigned index = 0);
421 // Unicode aware case insensitive string matching. Non-ASCII characters might
422 // match to ASCII characters. This function is rarely used to implement web
423 // platform features.
424 size_t findIgnoringCase(const StringView&, unsigned index = 0);
425 size_t findIgnoringASCIICase(const StringView&, unsigned index = 0);
426
427 size_t reverseFind(UChar, unsigned index = UINT_MAX);
428 size_t reverseFind(const StringView&, unsigned index = UINT_MAX);
429
430 bool startsWith(UChar) const;
431 bool startsWith(const StringView&) const;
432 bool startsWithIgnoringCase(const StringView&) const;
433 bool startsWithIgnoringASCIICase(const StringView&) const;
434
435 bool endsWith(UChar) const;
436 bool endsWith(const StringView&) const;
437 bool endsWithIgnoringCase(const StringView&) const;
438 bool endsWithIgnoringASCIICase(const StringView&) const;
439
440 // Replace parts of the string.
441 PassRefPtr<StringImpl> replace(UChar pattern, UChar replacement);
442 PassRefPtr<StringImpl> replace(UChar pattern, const StringView& replacement);
443 PassRefPtr<StringImpl> replace(const StringView& pattern,
444 const StringView& replacement);
445 PassRefPtr<StringImpl> replace(unsigned index,
446 unsigned lengthToReplace,
447 const StringView& replacement);
448
449 PassRefPtr<StringImpl> upconvertedString();
450
451 // Copy characters from string starting at |start| up until |maxLength| or
452 // the end of the string is reached. Returns the actual number of characters
453 // copied.
454 unsigned copyTo(UChar* buffer, unsigned start, unsigned maxLength) const;
455
456 // Append characters from this string into a buffer. Expects the buffer to
457 // have the methods:
458 // append(const UChar*, unsigned length);
459 // append(const LChar*, unsigned length);
460 // StringBuilder and Vector conform to this protocol.
461 template <typename BufferType>
462 void appendTo(BufferType&,
463 unsigned start = 0,
464 unsigned length = UINT_MAX) const;
465
466 // Prepend characters from this string into a buffer. Expects the buffer to
467 // have the methods:
468 // prepend(const UChar*, unsigned length);
469 // prepend(const LChar*, unsigned length);
470 // Vector conforms to this protocol.
471 template <typename BufferType>
472 void prependTo(BufferType&,
473 unsigned start = 0,
474 unsigned length = UINT_MAX) const;
475
476 #if OS(MACOSX)
477 RetainPtr<CFStringRef> createCFString();
478 #endif
479 #ifdef __OBJC__
480 operator NSString*();
481 #endif
482
483 #ifdef STRING_STATS
484 ALWAYS_INLINE static StringStats& stringStats() { return m_stringStats; }
485 #endif
486 static const UChar latin1CaseFoldTable[256];
487
488 private:
489 template <typename CharType>
490 static size_t allocationSize(unsigned length) {
491 RELEASE_ASSERT(
492 length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) /
493 sizeof(CharType)));
494 return sizeof(StringImpl) + length * sizeof(CharType);
495 }
496
497 PassRefPtr<StringImpl> replace(UChar pattern,
498 const LChar* replacement,
499 unsigned replacementLength);
500 PassRefPtr<StringImpl> replace(UChar pattern,
501 const UChar* replacement,
502 unsigned replacementLength);
503
504 template <class UCharPredicate>
505 PassRefPtr<StringImpl> stripMatchedCharacters(UCharPredicate);
506 template <typename CharType, class UCharPredicate>
507 PassRefPtr<StringImpl> simplifyMatchedCharactersToSpace(UCharPredicate,
508 StripBehavior);
509 NEVER_INLINE unsigned hashSlowCase() const;
510
511 void destroyIfNotStatic() const;
512 void updateContainsOnlyASCII() const;
513
514 #if DCHECK_IS_ON()
515 std::string asciiForDebugging() const;
516 #endif
517
518 #ifdef STRING_STATS
519 static StringStats m_stringStats;
520 #endif
521
522 static unsigned m_highestStaticStringLength;
523
524 #if DCHECK_IS_ON()
525 void assertHashIsCorrect() {
526 DCHECK(hasHash());
527 DCHECK_EQ(existingHash(), StringHasher::computeHashAndMaskTop8Bits(
528 characters8(), length()));
529 }
530 #endif
531
532 private:
533 #if DCHECK_IS_ON()
534 mutable ThreadRestrictionVerifier m_verifier;
535 #endif
536 mutable unsigned m_refCount;
537 const unsigned m_length;
538 mutable unsigned m_hash : 24;
539 mutable unsigned m_containsOnlyASCII : 1;
540 mutable unsigned m_needsASCIICheck : 1;
541 unsigned m_isAtomic : 1;
542 const unsigned m_is8Bit : 1;
543 const unsigned m_isStatic : 1;
544 };
545
546 template <>
547 ALWAYS_INLINE const LChar* StringImpl::getCharacters<LChar>() const {
548 return characters8();
549 }
550
551 template <>
552 ALWAYS_INLINE const UChar* StringImpl::getCharacters<UChar>() const {
553 return characters16();
554 }
555
556 WTF_EXPORT bool equal(const StringImpl*, const StringImpl*);
557 WTF_EXPORT bool equal(const StringImpl*, const LChar*);
558 inline bool equal(const StringImpl* a, const char* b) {
559 return equal(a, reinterpret_cast<const LChar*>(b));
560 }
561 WTF_EXPORT bool equal(const StringImpl*, const LChar*, unsigned);
562 WTF_EXPORT bool equal(const StringImpl*, const UChar*, unsigned);
563 inline bool equal(const StringImpl* a, const char* b, unsigned length) {
564 return equal(a, reinterpret_cast<const LChar*>(b), length);
565 }
566 inline bool equal(const LChar* a, StringImpl* b) {
567 return equal(b, a);
568 }
569 inline bool equal(const char* a, StringImpl* b) {
570 return equal(b, reinterpret_cast<const LChar*>(a));
571 }
572 WTF_EXPORT bool equalNonNull(const StringImpl* a, const StringImpl* b);
573
574 ALWAYS_INLINE bool StringImpl::containsOnlyASCII() const {
575 if (m_needsASCIICheck)
576 updateContainsOnlyASCII();
577 return m_containsOnlyASCII;
578 }
579
580 template <typename CharType>
581 ALWAYS_INLINE bool equal(const CharType* a,
582 const CharType* b,
583 unsigned length) {
584 return !memcmp(a, b, length * sizeof(CharType));
585 }
586
587 ALWAYS_INLINE bool equal(const LChar* a, const UChar* b, unsigned length) {
588 for (unsigned i = 0; i < length; ++i) {
589 if (a[i] != b[i])
590 return false;
591 }
592 return true;
593 }
594
595 ALWAYS_INLINE bool equal(const UChar* a, const LChar* b, unsigned length) {
596 return equal(b, a, length);
597 }
598
599 // Unicode aware case insensitive string matching. Non-ASCII characters might
600 // match to ASCII characters. These functions are rarely used to implement web
601 // platform features.
602 WTF_EXPORT bool equalIgnoringCase(const LChar*, const LChar*, unsigned length);
603 WTF_EXPORT bool equalIgnoringCase(const UChar*, const LChar*, unsigned length);
604 inline bool equalIgnoringCase(const LChar* a, const UChar* b, unsigned length) {
605 return equalIgnoringCase(b, a, length);
606 }
607 WTF_EXPORT bool equalIgnoringCase(const UChar*, const UChar*, unsigned length);
608
609 WTF_EXPORT bool equalIgnoringNullity(StringImpl*, StringImpl*);
610
611 template <typename CharacterTypeA, typename CharacterTypeB>
612 inline bool equalIgnoringASCIICase(const CharacterTypeA* a,
613 const CharacterTypeB* b,
614 unsigned length) {
615 for (unsigned i = 0; i < length; ++i) {
616 if (toASCIILower(a[i]) != toASCIILower(b[i]))
617 return false;
618 }
619 return true;
620 }
621
622 WTF_EXPORT int codePointCompareIgnoringASCIICase(const StringImpl*,
623 const LChar*);
624
625 inline size_t find(const LChar* characters,
626 unsigned length,
627 LChar matchCharacter,
628 unsigned index = 0) {
629 // Some clients rely on being able to pass index >= length.
630 if (index >= length)
631 return kNotFound;
632 const LChar* found = static_cast<const LChar*>(
633 memchr(characters + index, matchCharacter, length - index));
634 return found ? found - characters : kNotFound;
635 }
636
637 inline size_t find(const UChar* characters,
638 unsigned length,
639 UChar matchCharacter,
640 unsigned index = 0) {
641 while (index < length) {
642 if (characters[index] == matchCharacter)
643 return index;
644 ++index;
645 }
646 return kNotFound;
647 }
648
649 ALWAYS_INLINE size_t find(const UChar* characters,
650 unsigned length,
651 LChar matchCharacter,
652 unsigned index = 0) {
653 return find(characters, length, static_cast<UChar>(matchCharacter), index);
654 }
655
656 inline size_t find(const LChar* characters,
657 unsigned length,
658 UChar matchCharacter,
659 unsigned index = 0) {
660 if (matchCharacter & ~0xFF)
661 return kNotFound;
662 return find(characters, length, static_cast<LChar>(matchCharacter), index);
663 }
664
665 template <typename CharacterType>
666 inline size_t find(const CharacterType* characters,
667 unsigned length,
668 char matchCharacter,
669 unsigned index = 0) {
670 return find(characters, length, static_cast<LChar>(matchCharacter), index);
671 }
672
673 inline size_t find(const LChar* characters,
674 unsigned length,
675 CharacterMatchFunctionPtr matchFunction,
676 unsigned index = 0) {
677 while (index < length) {
678 if (matchFunction(characters[index]))
679 return index;
680 ++index;
681 }
682 return kNotFound;
683 }
684
685 inline size_t find(const UChar* characters,
686 unsigned length,
687 CharacterMatchFunctionPtr matchFunction,
688 unsigned index = 0) {
689 while (index < length) {
690 if (matchFunction(characters[index]))
691 return index;
692 ++index;
693 }
694 return kNotFound;
695 }
696
697 template <typename CharacterType>
698 inline size_t reverseFind(const CharacterType* characters,
699 unsigned length,
700 CharacterType matchCharacter,
701 unsigned index = UINT_MAX) {
702 if (!length)
703 return kNotFound;
704 if (index >= length)
705 index = length - 1;
706 while (characters[index] != matchCharacter) {
707 if (!index--)
708 return kNotFound;
709 }
710 return index;
711 }
712
713 ALWAYS_INLINE size_t reverseFind(const UChar* characters,
714 unsigned length,
715 LChar matchCharacter,
716 unsigned index = UINT_MAX) {
717 return reverseFind(characters, length, static_cast<UChar>(matchCharacter),
718 index);
719 }
720
721 inline size_t reverseFind(const LChar* characters,
722 unsigned length,
723 UChar matchCharacter,
724 unsigned index = UINT_MAX) {
725 if (matchCharacter & ~0xFF)
726 return kNotFound;
727 return reverseFind(characters, length, static_cast<LChar>(matchCharacter),
728 index);
729 }
730
731 inline size_t StringImpl::find(LChar character, unsigned start) {
732 if (is8Bit())
733 return WTF::find(characters8(), m_length, character, start);
734 return WTF::find(characters16(), m_length, character, start);
735 }
736
737 ALWAYS_INLINE size_t StringImpl::find(char character, unsigned start) {
738 return find(static_cast<LChar>(character), start);
739 }
740
741 inline size_t StringImpl::find(UChar character, unsigned start) {
742 if (is8Bit())
743 return WTF::find(characters8(), m_length, character, start);
744 return WTF::find(characters16(), m_length, character, start);
745 }
746
747 inline unsigned lengthOfNullTerminatedString(const UChar* string) {
748 size_t length = 0;
749 while (string[length] != UChar(0))
750 ++length;
751 RELEASE_ASSERT(length <= std::numeric_limits<unsigned>::max());
752 return static_cast<unsigned>(length);
753 }
754
755 template <size_t inlineCapacity>
756 bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a,
757 StringImpl* b) {
758 if (!b)
759 return !a.size();
760 if (a.size() != b->length())
761 return false;
762 if (b->is8Bit())
763 return equal(a.data(), b->characters8(), b->length());
764 return equal(a.data(), b->characters16(), b->length());
765 }
766
767 template <typename CharacterType1, typename CharacterType2>
768 static inline int codePointCompare(unsigned l1,
769 unsigned l2,
770 const CharacterType1* c1,
771 const CharacterType2* c2) {
772 const unsigned lmin = l1 < l2 ? l1 : l2;
773 unsigned pos = 0;
774 while (pos < lmin && *c1 == *c2) {
775 ++c1;
776 ++c2;
777 ++pos;
778 }
779
780 if (pos < lmin)
781 return (c1[0] > c2[0]) ? 1 : -1;
782
783 if (l1 == l2)
784 return 0;
785
786 return (l1 > l2) ? 1 : -1;
787 }
788
789 static inline int codePointCompare8(const StringImpl* string1,
790 const StringImpl* string2) {
791 return codePointCompare(string1->length(), string2->length(),
792 string1->characters8(), string2->characters8());
793 }
794
795 static inline int codePointCompare16(const StringImpl* string1,
796 const StringImpl* string2) {
797 return codePointCompare(string1->length(), string2->length(),
798 string1->characters16(), string2->characters16());
799 }
800
801 static inline int codePointCompare8To16(const StringImpl* string1,
802 const StringImpl* string2) {
803 return codePointCompare(string1->length(), string2->length(),
804 string1->characters8(), string2->characters16());
805 }
806
807 static inline int codePointCompare(const StringImpl* string1,
808 const StringImpl* string2) {
809 if (!string1)
810 return (string2 && string2->length()) ? -1 : 0;
811
812 if (!string2)
813 return string1->length() ? 1 : 0;
814
815 bool string1Is8Bit = string1->is8Bit();
816 bool string2Is8Bit = string2->is8Bit();
817 if (string1Is8Bit) {
818 if (string2Is8Bit)
819 return codePointCompare8(string1, string2);
820 return codePointCompare8To16(string1, string2);
821 }
822 if (string2Is8Bit)
823 return -codePointCompare8To16(string2, string1);
824 return codePointCompare16(string1, string2);
825 }
826
827 static inline bool isSpaceOrNewline(UChar c) {
828 // Use isASCIISpace() for basic Latin-1.
829 // This will include newlines, which aren't included in Unicode DirWS.
830 return c <= 0x7F
831 ? WTF::isASCIISpace(c)
832 : WTF::Unicode::direction(c) == WTF::Unicode::WhiteSpaceNeutral;
833 }
834
835 inline PassRefPtr<StringImpl> StringImpl::isolatedCopy() const {
836 if (is8Bit())
837 return create(characters8(), m_length);
838 return create(characters16(), m_length);
839 }
840
841 template <typename BufferType>
842 inline void StringImpl::appendTo(BufferType& result,
843 unsigned start,
844 unsigned length) const {
845 unsigned numberOfCharactersToCopy = std::min(length, m_length - start);
846 if (!numberOfCharactersToCopy)
847 return;
848 if (is8Bit())
849 result.append(characters8() + start, numberOfCharactersToCopy);
850 else
851 result.append(characters16() + start, numberOfCharactersToCopy);
852 }
853
854 template <typename BufferType>
855 inline void StringImpl::prependTo(BufferType& result,
856 unsigned start,
857 unsigned length) const {
858 unsigned numberOfCharactersToCopy = std::min(length, m_length - start);
859 if (!numberOfCharactersToCopy)
860 return;
861 if (is8Bit())
862 result.prepend(characters8() + start, numberOfCharactersToCopy);
863 else
864 result.prepend(characters16() + start, numberOfCharactersToCopy);
865 }
866
867 // TODO(rob.buis) possibly find a better place for this method.
868 // Turns a UChar32 to uppercase based on localeIdentifier.
869 WTF_EXPORT UChar32 toUpper(UChar32, const AtomicString& localeIdentifier);
870
871 struct StringHash;
872
873 // StringHash is the default hash for StringImpl* and RefPtr<StringImpl>
874 template <typename T>
875 struct DefaultHash;
876 template <>
877 struct DefaultHash<StringImpl*> {
878 typedef StringHash Hash;
879 };
880 template <>
881 struct DefaultHash<RefPtr<StringImpl>> {
882 typedef StringHash Hash;
883 };
884
885 } // namespace WTF
886
887 using WTF::StringImpl;
888 using WTF::TextCaseASCIIInsensitive;
889 using WTF::TextCaseUnicodeInsensitive;
890 using WTF::TextCaseSensitive;
891 using WTF::TextCaseSensitivity;
892 using WTF::equal;
893 using WTF::equalNonNull;
894 using WTF::lengthOfNullTerminatedString;
895 using WTF::reverseFind;
896
897 #endif
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/wtf/text/StringHash.h ('k') | third_party/WebKit/Source/wtf/text/StringImpl.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698