OLD | NEW |
1 /* | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org) | 2 // Use of this source code is governed by a BSD-style license that can be |
3 * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2013 Apple Inc. All rights | 3 // found in the LICENSE file. |
4 * reserved. | |
5 * Copyright (C) 2009 Google Inc. All rights reserved. | |
6 * | |
7 * This library is free software; you can redistribute it and/or | |
8 * modify it under the terms of the GNU Library General Public | |
9 * License as published by the Free Software Foundation; either | |
10 * version 2 of the License, or (at your option) any later version. | |
11 * | |
12 * This library is distributed in the hope that it will be useful, | |
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 * Library General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU Library General Public License | |
18 * along with this library; see the file COPYING.LIB. If not, write to | |
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
20 * Boston, MA 02110-1301, USA. | |
21 * | |
22 */ | |
23 | 4 |
24 #ifndef StringImpl_h | 5 #include "platform/wtf/text/StringImpl.h" |
25 #define StringImpl_h | |
26 | 6 |
27 #include "wtf/ASCIICType.h" | 7 // The contents of this header was moved to platform/wtf as part of |
28 #include "wtf/Forward.h" | 8 // WTF migration project. See the following post for details: |
29 #include "wtf/HashMap.h" | 9 // https://groups.google.com/a/chromium.org/d/msg/blink-dev/tLdAZCTlcAA/bYXVT8gY
CAAJ |
30 #include "wtf/StringHasher.h" | |
31 #include "wtf/Vector.h" | |
32 #include "wtf/WTFExport.h" | |
33 #include "wtf/text/ASCIIFastPath.h" | |
34 #include "wtf/text/Unicode.h" | |
35 #include <limits.h> | |
36 #include <string.h> | |
37 | |
38 #if DCHECK_IS_ON() | |
39 #include "wtf/ThreadRestrictionVerifier.h" | |
40 #endif | |
41 | |
42 #if OS(MACOSX) | |
43 typedef const struct __CFString* CFStringRef; | |
44 #endif | |
45 | |
46 #ifdef __OBJC__ | |
47 @class NSString; | |
48 #endif | |
49 | |
50 namespace WTF { | |
51 | |
52 struct AlreadyHashed; | |
53 template <typename> | |
54 class RetainPtr; | |
55 | |
56 enum TextCaseSensitivity { | |
57 TextCaseSensitive, | |
58 TextCaseASCIIInsensitive, | |
59 | |
60 // Unicode aware case insensitive matching. Non-ASCII characters might match | |
61 // to ASCII characters. This flag is rarely used to implement web platform | |
62 // features. | |
63 TextCaseUnicodeInsensitive | |
64 }; | |
65 | |
66 enum StripBehavior { StripExtraWhiteSpace, DoNotStripWhiteSpace }; | |
67 | |
68 typedef bool (*CharacterMatchFunctionPtr)(UChar); | |
69 typedef bool (*IsWhiteSpaceFunctionPtr)(UChar); | |
70 typedef HashMap<unsigned, StringImpl*, AlreadyHashed> StaticStringsTable; | |
71 | |
72 // Define STRING_STATS to turn on run time statistics of string sizes and memory | |
73 // usage | |
74 #undef STRING_STATS | |
75 | |
76 #ifdef STRING_STATS | |
77 struct StringStats { | |
78 inline void add8BitString(unsigned length) { | |
79 ++m_totalNumberStrings; | |
80 ++m_number8BitStrings; | |
81 m_total8BitData += length; | |
82 } | |
83 | |
84 inline void add16BitString(unsigned length) { | |
85 ++m_totalNumberStrings; | |
86 ++m_number16BitStrings; | |
87 m_total16BitData += length; | |
88 } | |
89 | |
90 void removeString(StringImpl*); | |
91 void printStats(); | |
92 | |
93 static const unsigned s_printStringStatsFrequency = 5000; | |
94 static unsigned s_stringRemovesTillPrintStats; | |
95 | |
96 unsigned m_totalNumberStrings; | |
97 unsigned m_number8BitStrings; | |
98 unsigned m_number16BitStrings; | |
99 unsigned long long m_total8BitData; | |
100 unsigned long long m_total16BitData; | |
101 }; | |
102 | |
103 void addStringForStats(StringImpl*); | |
104 void removeStringForStats(StringImpl*); | |
105 | |
106 #define STRING_STATS_ADD_8BIT_STRING(length) \ | |
107 StringImpl::stringStats().add8BitString(length); \ | |
108 addStringForStats(this) | |
109 #define STRING_STATS_ADD_16BIT_STRING(length) \ | |
110 StringImpl::stringStats().add16BitString(length); \ | |
111 addStringForStats(this) | |
112 #define STRING_STATS_REMOVE_STRING(string) \ | |
113 StringImpl::stringStats().removeString(string); \ | |
114 removeStringForStats(this) | |
115 #else | |
116 #define STRING_STATS_ADD_8BIT_STRING(length) ((void)0) | |
117 #define STRING_STATS_ADD_16BIT_STRING(length) ((void)0) | |
118 #define STRING_STATS_REMOVE_STRING(string) ((void)0) | |
119 #endif | |
120 | |
121 // You can find documentation about this class in this doc: | |
122 // https://docs.google.com/document/d/1kOCUlJdh2WJMJGDf-WoEQhmnjKLaOYRbiHz5TiGJl
14/edit?usp=sharing | |
123 class WTF_EXPORT StringImpl { | |
124 WTF_MAKE_NONCOPYABLE(StringImpl); | |
125 | |
126 private: | |
127 // StringImpls are allocated out of the WTF buffer partition. | |
128 void* operator new(size_t); | |
129 void* operator new(size_t, void* ptr) { return ptr; } | |
130 void operator delete(void*); | |
131 | |
132 // Used to construct static strings, which have an special refCount that can | |
133 // never hit zero. This means that the static string will never be | |
134 // destroyed, which is important because static strings will be shared | |
135 // across threads & ref-counted in a non-threadsafe manner. | |
136 enum ConstructEmptyStringTag { ConstructEmptyString }; | |
137 explicit StringImpl(ConstructEmptyStringTag) | |
138 : m_refCount(1), | |
139 m_length(0), | |
140 m_hash(0), | |
141 m_containsOnlyASCII(true), | |
142 m_needsASCIICheck(false), | |
143 m_isAtomic(false), | |
144 m_is8Bit(true), | |
145 m_isStatic(true) { | |
146 // Ensure that the hash is computed so that AtomicStringHash can call | |
147 // existingHash() with impunity. The empty string is special because it | |
148 // is never entered into AtomicString's HashKey, but still needs to | |
149 // compare correctly. | |
150 STRING_STATS_ADD_8BIT_STRING(m_length); | |
151 hash(); | |
152 } | |
153 | |
154 enum ConstructEmptyString16BitTag { ConstructEmptyString16Bit }; | |
155 explicit StringImpl(ConstructEmptyString16BitTag) | |
156 : m_refCount(1), | |
157 m_length(0), | |
158 m_hash(0), | |
159 m_containsOnlyASCII(true), | |
160 m_needsASCIICheck(false), | |
161 m_isAtomic(false), | |
162 m_is8Bit(false), | |
163 m_isStatic(true) { | |
164 STRING_STATS_ADD_16BIT_STRING(m_length); | |
165 hash(); | |
166 } | |
167 | |
168 // FIXME: there has to be a less hacky way to do this. | |
169 enum Force8Bit { Force8BitConstructor }; | |
170 StringImpl(unsigned length, Force8Bit) | |
171 : m_refCount(1), | |
172 m_length(length), | |
173 m_hash(0), | |
174 m_containsOnlyASCII(!length), | |
175 m_needsASCIICheck(static_cast<bool>(length)), | |
176 m_isAtomic(false), | |
177 m_is8Bit(true), | |
178 m_isStatic(false) { | |
179 DCHECK(m_length); | |
180 STRING_STATS_ADD_8BIT_STRING(m_length); | |
181 } | |
182 | |
183 StringImpl(unsigned length) | |
184 : m_refCount(1), | |
185 m_length(length), | |
186 m_hash(0), | |
187 m_containsOnlyASCII(!length), | |
188 m_needsASCIICheck(static_cast<bool>(length)), | |
189 m_isAtomic(false), | |
190 m_is8Bit(false), | |
191 m_isStatic(false) { | |
192 DCHECK(m_length); | |
193 STRING_STATS_ADD_16BIT_STRING(m_length); | |
194 } | |
195 | |
196 enum StaticStringTag { StaticString }; | |
197 StringImpl(unsigned length, unsigned hash, StaticStringTag) | |
198 : m_refCount(1), | |
199 m_length(length), | |
200 m_hash(hash), | |
201 m_containsOnlyASCII(!length), | |
202 m_needsASCIICheck(static_cast<bool>(length)), | |
203 m_isAtomic(false), | |
204 m_is8Bit(true), | |
205 m_isStatic(true) {} | |
206 | |
207 public: | |
208 static StringImpl* empty; | |
209 static StringImpl* empty16Bit; | |
210 | |
211 ~StringImpl(); | |
212 | |
213 static void initStatics(); | |
214 | |
215 static StringImpl* createStatic(const char* string, | |
216 unsigned length, | |
217 unsigned hash); | |
218 static void reserveStaticStringsCapacityForSize(unsigned size); | |
219 static void freezeStaticStrings(); | |
220 static const StaticStringsTable& allStaticStrings(); | |
221 static unsigned highestStaticStringLength() { | |
222 return m_highestStaticStringLength; | |
223 } | |
224 | |
225 static PassRefPtr<StringImpl> create(const UChar*, unsigned length); | |
226 static PassRefPtr<StringImpl> create(const LChar*, unsigned length); | |
227 static PassRefPtr<StringImpl> create8BitIfPossible(const UChar*, | |
228 unsigned length); | |
229 template <size_t inlineCapacity> | |
230 static PassRefPtr<StringImpl> create8BitIfPossible( | |
231 const Vector<UChar, inlineCapacity>& vector) { | |
232 return create8BitIfPossible(vector.data(), vector.size()); | |
233 } | |
234 | |
235 ALWAYS_INLINE static PassRefPtr<StringImpl> create(const char* s, | |
236 unsigned length) { | |
237 return create(reinterpret_cast<const LChar*>(s), length); | |
238 } | |
239 static PassRefPtr<StringImpl> create(const LChar*); | |
240 ALWAYS_INLINE static PassRefPtr<StringImpl> create(const char* s) { | |
241 return create(reinterpret_cast<const LChar*>(s)); | |
242 } | |
243 | |
244 static PassRefPtr<StringImpl> createUninitialized(unsigned length, | |
245 LChar*& data); | |
246 static PassRefPtr<StringImpl> createUninitialized(unsigned length, | |
247 UChar*& data); | |
248 | |
249 unsigned length() const { return m_length; } | |
250 bool is8Bit() const { return m_is8Bit; } | |
251 | |
252 ALWAYS_INLINE const LChar* characters8() const { | |
253 DCHECK(is8Bit()); | |
254 return reinterpret_cast<const LChar*>(this + 1); | |
255 } | |
256 ALWAYS_INLINE const UChar* characters16() const { | |
257 DCHECK(!is8Bit()); | |
258 return reinterpret_cast<const UChar*>(this + 1); | |
259 } | |
260 ALWAYS_INLINE const void* bytes() const { | |
261 return reinterpret_cast<const void*>(this + 1); | |
262 } | |
263 | |
264 template <typename CharType> | |
265 ALWAYS_INLINE const CharType* getCharacters() const; | |
266 | |
267 size_t charactersSizeInBytes() const { | |
268 return length() * (is8Bit() ? sizeof(LChar) : sizeof(UChar)); | |
269 } | |
270 | |
271 bool isAtomic() const { return m_isAtomic; } | |
272 void setIsAtomic(bool isAtomic) { m_isAtomic = isAtomic; } | |
273 | |
274 bool isStatic() const { return m_isStatic; } | |
275 | |
276 bool containsOnlyASCII() const; | |
277 | |
278 bool isSafeToSendToAnotherThread() const; | |
279 | |
280 // The high bits of 'hash' are always empty, but we prefer to store our | |
281 // flags in the low bits because it makes them slightly more efficient to | |
282 // access. So, we shift left and right when setting and getting our hash | |
283 // code. | |
284 void setHash(unsigned hash) const { | |
285 DCHECK(!hasHash()); | |
286 // Multiple clients assume that StringHasher is the canonical string | |
287 // hash function. | |
288 DCHECK(hash == (is8Bit() ? StringHasher::computeHashAndMaskTop8Bits( | |
289 characters8(), m_length) | |
290 : StringHasher::computeHashAndMaskTop8Bits( | |
291 characters16(), m_length))); | |
292 m_hash = hash; | |
293 DCHECK(hash); // Verify that 0 is a valid sentinel hash value. | |
294 } | |
295 | |
296 bool hasHash() const { return m_hash != 0; } | |
297 | |
298 unsigned existingHash() const { | |
299 DCHECK(hasHash()); | |
300 return m_hash; | |
301 } | |
302 | |
303 unsigned hash() const { | |
304 if (hasHash()) | |
305 return existingHash(); | |
306 return hashSlowCase(); | |
307 } | |
308 | |
309 ALWAYS_INLINE bool hasOneRef() const { | |
310 #if DCHECK_IS_ON() | |
311 DCHECK(isStatic() || m_verifier.isSafeToUse()) << asciiForDebugging(); | |
312 #endif | |
313 return m_refCount == 1; | |
314 } | |
315 | |
316 ALWAYS_INLINE void ref() const { | |
317 #if DCHECK_IS_ON() | |
318 DCHECK(isStatic() || m_verifier.onRef(m_refCount)) << asciiForDebugging(); | |
319 #endif | |
320 ++m_refCount; | |
321 } | |
322 | |
323 ALWAYS_INLINE void deref() const { | |
324 #if DCHECK_IS_ON() | |
325 DCHECK(isStatic() || m_verifier.onDeref(m_refCount)) | |
326 << asciiForDebugging() << " " << currentThread(); | |
327 #endif | |
328 if (!--m_refCount) | |
329 destroyIfNotStatic(); | |
330 } | |
331 | |
332 // FIXME: Does this really belong in StringImpl? | |
333 template <typename T> | |
334 static void copyChars(T* destination, | |
335 const T* source, | |
336 unsigned numCharacters) { | |
337 memcpy(destination, source, numCharacters * sizeof(T)); | |
338 } | |
339 | |
340 ALWAYS_INLINE static void copyChars(UChar* destination, | |
341 const LChar* source, | |
342 unsigned numCharacters) { | |
343 for (unsigned i = 0; i < numCharacters; ++i) | |
344 destination[i] = source[i]; | |
345 } | |
346 | |
347 // Some string features, like refcounting and the atomicity flag, are not | |
348 // thread-safe. We achieve thread safety by isolation, giving each thread | |
349 // its own copy of the string. | |
350 PassRefPtr<StringImpl> isolatedCopy() const; | |
351 | |
352 PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX) const; | |
353 | |
354 UChar operator[](unsigned i) const { | |
355 SECURITY_DCHECK(i < m_length); | |
356 if (is8Bit()) | |
357 return characters8()[i]; | |
358 return characters16()[i]; | |
359 } | |
360 UChar32 characterStartingAt(unsigned); | |
361 | |
362 bool containsOnlyWhitespace(); | |
363 | |
364 int toIntStrict(bool* ok = 0, int base = 10); | |
365 unsigned toUIntStrict(bool* ok = 0, int base = 10); | |
366 int64_t toInt64Strict(bool* ok = 0, int base = 10); | |
367 uint64_t toUInt64Strict(bool* ok = 0, int base = 10); | |
368 | |
369 int toInt(bool* ok = 0); // ignores trailing garbage | |
370 unsigned toUInt(bool* ok = 0); // ignores trailing garbage | |
371 int64_t toInt64(bool* ok = 0); // ignores trailing garbage | |
372 uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage | |
373 | |
374 // FIXME: Like the strict functions above, these give false for "ok" when | |
375 // there is trailing garbage. Like the non-strict functions above, these | |
376 // return the value when there is trailing garbage. It would be better if | |
377 // these were more consistent with the above functions instead. | |
378 double toDouble(bool* ok = 0); | |
379 float toFloat(bool* ok = 0); | |
380 | |
381 PassRefPtr<StringImpl> lower(); | |
382 PassRefPtr<StringImpl> lowerASCII(); | |
383 PassRefPtr<StringImpl> upper(); | |
384 PassRefPtr<StringImpl> upperASCII(); | |
385 PassRefPtr<StringImpl> lower(const AtomicString& localeIdentifier); | |
386 PassRefPtr<StringImpl> upper(const AtomicString& localeIdentifier); | |
387 | |
388 PassRefPtr<StringImpl> fill(UChar); | |
389 // FIXME: Do we need fill(char) or can we just do the right thing if UChar is | |
390 // ASCII? | |
391 PassRefPtr<StringImpl> foldCase(); | |
392 | |
393 PassRefPtr<StringImpl> truncate(unsigned length); | |
394 | |
395 PassRefPtr<StringImpl> stripWhiteSpace(); | |
396 PassRefPtr<StringImpl> stripWhiteSpace(IsWhiteSpaceFunctionPtr); | |
397 PassRefPtr<StringImpl> simplifyWhiteSpace( | |
398 StripBehavior = StripExtraWhiteSpace); | |
399 PassRefPtr<StringImpl> simplifyWhiteSpace( | |
400 IsWhiteSpaceFunctionPtr, | |
401 StripBehavior = StripExtraWhiteSpace); | |
402 | |
403 PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr); | |
404 template <typename CharType> | |
405 ALWAYS_INLINE PassRefPtr<StringImpl> removeCharacters( | |
406 const CharType* characters, | |
407 CharacterMatchFunctionPtr); | |
408 | |
409 // Remove characters between [start, start+lengthToRemove). The range is | |
410 // clamped to the size of the string. Does nothing if start >= length(). | |
411 PassRefPtr<StringImpl> remove(unsigned start, unsigned lengthToRemove = 1); | |
412 | |
413 // Find characters. | |
414 size_t find(LChar character, unsigned start = 0); | |
415 size_t find(char character, unsigned start = 0); | |
416 size_t find(UChar character, unsigned start = 0); | |
417 size_t find(CharacterMatchFunctionPtr, unsigned index = 0); | |
418 | |
419 // Find substrings. | |
420 size_t find(const StringView&, unsigned index = 0); | |
421 // Unicode aware case insensitive string matching. Non-ASCII characters might | |
422 // match to ASCII characters. This function is rarely used to implement web | |
423 // platform features. | |
424 size_t findIgnoringCase(const StringView&, unsigned index = 0); | |
425 size_t findIgnoringASCIICase(const StringView&, unsigned index = 0); | |
426 | |
427 size_t reverseFind(UChar, unsigned index = UINT_MAX); | |
428 size_t reverseFind(const StringView&, unsigned index = UINT_MAX); | |
429 | |
430 bool startsWith(UChar) const; | |
431 bool startsWith(const StringView&) const; | |
432 bool startsWithIgnoringCase(const StringView&) const; | |
433 bool startsWithIgnoringASCIICase(const StringView&) const; | |
434 | |
435 bool endsWith(UChar) const; | |
436 bool endsWith(const StringView&) const; | |
437 bool endsWithIgnoringCase(const StringView&) const; | |
438 bool endsWithIgnoringASCIICase(const StringView&) const; | |
439 | |
440 // Replace parts of the string. | |
441 PassRefPtr<StringImpl> replace(UChar pattern, UChar replacement); | |
442 PassRefPtr<StringImpl> replace(UChar pattern, const StringView& replacement); | |
443 PassRefPtr<StringImpl> replace(const StringView& pattern, | |
444 const StringView& replacement); | |
445 PassRefPtr<StringImpl> replace(unsigned index, | |
446 unsigned lengthToReplace, | |
447 const StringView& replacement); | |
448 | |
449 PassRefPtr<StringImpl> upconvertedString(); | |
450 | |
451 // Copy characters from string starting at |start| up until |maxLength| or | |
452 // the end of the string is reached. Returns the actual number of characters | |
453 // copied. | |
454 unsigned copyTo(UChar* buffer, unsigned start, unsigned maxLength) const; | |
455 | |
456 // Append characters from this string into a buffer. Expects the buffer to | |
457 // have the methods: | |
458 // append(const UChar*, unsigned length); | |
459 // append(const LChar*, unsigned length); | |
460 // StringBuilder and Vector conform to this protocol. | |
461 template <typename BufferType> | |
462 void appendTo(BufferType&, | |
463 unsigned start = 0, | |
464 unsigned length = UINT_MAX) const; | |
465 | |
466 // Prepend characters from this string into a buffer. Expects the buffer to | |
467 // have the methods: | |
468 // prepend(const UChar*, unsigned length); | |
469 // prepend(const LChar*, unsigned length); | |
470 // Vector conforms to this protocol. | |
471 template <typename BufferType> | |
472 void prependTo(BufferType&, | |
473 unsigned start = 0, | |
474 unsigned length = UINT_MAX) const; | |
475 | |
476 #if OS(MACOSX) | |
477 RetainPtr<CFStringRef> createCFString(); | |
478 #endif | |
479 #ifdef __OBJC__ | |
480 operator NSString*(); | |
481 #endif | |
482 | |
483 #ifdef STRING_STATS | |
484 ALWAYS_INLINE static StringStats& stringStats() { return m_stringStats; } | |
485 #endif | |
486 static const UChar latin1CaseFoldTable[256]; | |
487 | |
488 private: | |
489 template <typename CharType> | |
490 static size_t allocationSize(unsigned length) { | |
491 RELEASE_ASSERT( | |
492 length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / | |
493 sizeof(CharType))); | |
494 return sizeof(StringImpl) + length * sizeof(CharType); | |
495 } | |
496 | |
497 PassRefPtr<StringImpl> replace(UChar pattern, | |
498 const LChar* replacement, | |
499 unsigned replacementLength); | |
500 PassRefPtr<StringImpl> replace(UChar pattern, | |
501 const UChar* replacement, | |
502 unsigned replacementLength); | |
503 | |
504 template <class UCharPredicate> | |
505 PassRefPtr<StringImpl> stripMatchedCharacters(UCharPredicate); | |
506 template <typename CharType, class UCharPredicate> | |
507 PassRefPtr<StringImpl> simplifyMatchedCharactersToSpace(UCharPredicate, | |
508 StripBehavior); | |
509 NEVER_INLINE unsigned hashSlowCase() const; | |
510 | |
511 void destroyIfNotStatic() const; | |
512 void updateContainsOnlyASCII() const; | |
513 | |
514 #if DCHECK_IS_ON() | |
515 std::string asciiForDebugging() const; | |
516 #endif | |
517 | |
518 #ifdef STRING_STATS | |
519 static StringStats m_stringStats; | |
520 #endif | |
521 | |
522 static unsigned m_highestStaticStringLength; | |
523 | |
524 #if DCHECK_IS_ON() | |
525 void assertHashIsCorrect() { | |
526 DCHECK(hasHash()); | |
527 DCHECK_EQ(existingHash(), StringHasher::computeHashAndMaskTop8Bits( | |
528 characters8(), length())); | |
529 } | |
530 #endif | |
531 | |
532 private: | |
533 #if DCHECK_IS_ON() | |
534 mutable ThreadRestrictionVerifier m_verifier; | |
535 #endif | |
536 mutable unsigned m_refCount; | |
537 const unsigned m_length; | |
538 mutable unsigned m_hash : 24; | |
539 mutable unsigned m_containsOnlyASCII : 1; | |
540 mutable unsigned m_needsASCIICheck : 1; | |
541 unsigned m_isAtomic : 1; | |
542 const unsigned m_is8Bit : 1; | |
543 const unsigned m_isStatic : 1; | |
544 }; | |
545 | |
546 template <> | |
547 ALWAYS_INLINE const LChar* StringImpl::getCharacters<LChar>() const { | |
548 return characters8(); | |
549 } | |
550 | |
551 template <> | |
552 ALWAYS_INLINE const UChar* StringImpl::getCharacters<UChar>() const { | |
553 return characters16(); | |
554 } | |
555 | |
556 WTF_EXPORT bool equal(const StringImpl*, const StringImpl*); | |
557 WTF_EXPORT bool equal(const StringImpl*, const LChar*); | |
558 inline bool equal(const StringImpl* a, const char* b) { | |
559 return equal(a, reinterpret_cast<const LChar*>(b)); | |
560 } | |
561 WTF_EXPORT bool equal(const StringImpl*, const LChar*, unsigned); | |
562 WTF_EXPORT bool equal(const StringImpl*, const UChar*, unsigned); | |
563 inline bool equal(const StringImpl* a, const char* b, unsigned length) { | |
564 return equal(a, reinterpret_cast<const LChar*>(b), length); | |
565 } | |
566 inline bool equal(const LChar* a, StringImpl* b) { | |
567 return equal(b, a); | |
568 } | |
569 inline bool equal(const char* a, StringImpl* b) { | |
570 return equal(b, reinterpret_cast<const LChar*>(a)); | |
571 } | |
572 WTF_EXPORT bool equalNonNull(const StringImpl* a, const StringImpl* b); | |
573 | |
574 ALWAYS_INLINE bool StringImpl::containsOnlyASCII() const { | |
575 if (m_needsASCIICheck) | |
576 updateContainsOnlyASCII(); | |
577 return m_containsOnlyASCII; | |
578 } | |
579 | |
580 template <typename CharType> | |
581 ALWAYS_INLINE bool equal(const CharType* a, | |
582 const CharType* b, | |
583 unsigned length) { | |
584 return !memcmp(a, b, length * sizeof(CharType)); | |
585 } | |
586 | |
587 ALWAYS_INLINE bool equal(const LChar* a, const UChar* b, unsigned length) { | |
588 for (unsigned i = 0; i < length; ++i) { | |
589 if (a[i] != b[i]) | |
590 return false; | |
591 } | |
592 return true; | |
593 } | |
594 | |
595 ALWAYS_INLINE bool equal(const UChar* a, const LChar* b, unsigned length) { | |
596 return equal(b, a, length); | |
597 } | |
598 | |
599 // Unicode aware case insensitive string matching. Non-ASCII characters might | |
600 // match to ASCII characters. These functions are rarely used to implement web | |
601 // platform features. | |
602 WTF_EXPORT bool equalIgnoringCase(const LChar*, const LChar*, unsigned length); | |
603 WTF_EXPORT bool equalIgnoringCase(const UChar*, const LChar*, unsigned length); | |
604 inline bool equalIgnoringCase(const LChar* a, const UChar* b, unsigned length) { | |
605 return equalIgnoringCase(b, a, length); | |
606 } | |
607 WTF_EXPORT bool equalIgnoringCase(const UChar*, const UChar*, unsigned length); | |
608 | |
609 WTF_EXPORT bool equalIgnoringNullity(StringImpl*, StringImpl*); | |
610 | |
611 template <typename CharacterTypeA, typename CharacterTypeB> | |
612 inline bool equalIgnoringASCIICase(const CharacterTypeA* a, | |
613 const CharacterTypeB* b, | |
614 unsigned length) { | |
615 for (unsigned i = 0; i < length; ++i) { | |
616 if (toASCIILower(a[i]) != toASCIILower(b[i])) | |
617 return false; | |
618 } | |
619 return true; | |
620 } | |
621 | |
622 WTF_EXPORT int codePointCompareIgnoringASCIICase(const StringImpl*, | |
623 const LChar*); | |
624 | |
625 inline size_t find(const LChar* characters, | |
626 unsigned length, | |
627 LChar matchCharacter, | |
628 unsigned index = 0) { | |
629 // Some clients rely on being able to pass index >= length. | |
630 if (index >= length) | |
631 return kNotFound; | |
632 const LChar* found = static_cast<const LChar*>( | |
633 memchr(characters + index, matchCharacter, length - index)); | |
634 return found ? found - characters : kNotFound; | |
635 } | |
636 | |
637 inline size_t find(const UChar* characters, | |
638 unsigned length, | |
639 UChar matchCharacter, | |
640 unsigned index = 0) { | |
641 while (index < length) { | |
642 if (characters[index] == matchCharacter) | |
643 return index; | |
644 ++index; | |
645 } | |
646 return kNotFound; | |
647 } | |
648 | |
649 ALWAYS_INLINE size_t find(const UChar* characters, | |
650 unsigned length, | |
651 LChar matchCharacter, | |
652 unsigned index = 0) { | |
653 return find(characters, length, static_cast<UChar>(matchCharacter), index); | |
654 } | |
655 | |
656 inline size_t find(const LChar* characters, | |
657 unsigned length, | |
658 UChar matchCharacter, | |
659 unsigned index = 0) { | |
660 if (matchCharacter & ~0xFF) | |
661 return kNotFound; | |
662 return find(characters, length, static_cast<LChar>(matchCharacter), index); | |
663 } | |
664 | |
665 template <typename CharacterType> | |
666 inline size_t find(const CharacterType* characters, | |
667 unsigned length, | |
668 char matchCharacter, | |
669 unsigned index = 0) { | |
670 return find(characters, length, static_cast<LChar>(matchCharacter), index); | |
671 } | |
672 | |
673 inline size_t find(const LChar* characters, | |
674 unsigned length, | |
675 CharacterMatchFunctionPtr matchFunction, | |
676 unsigned index = 0) { | |
677 while (index < length) { | |
678 if (matchFunction(characters[index])) | |
679 return index; | |
680 ++index; | |
681 } | |
682 return kNotFound; | |
683 } | |
684 | |
685 inline size_t find(const UChar* characters, | |
686 unsigned length, | |
687 CharacterMatchFunctionPtr matchFunction, | |
688 unsigned index = 0) { | |
689 while (index < length) { | |
690 if (matchFunction(characters[index])) | |
691 return index; | |
692 ++index; | |
693 } | |
694 return kNotFound; | |
695 } | |
696 | |
697 template <typename CharacterType> | |
698 inline size_t reverseFind(const CharacterType* characters, | |
699 unsigned length, | |
700 CharacterType matchCharacter, | |
701 unsigned index = UINT_MAX) { | |
702 if (!length) | |
703 return kNotFound; | |
704 if (index >= length) | |
705 index = length - 1; | |
706 while (characters[index] != matchCharacter) { | |
707 if (!index--) | |
708 return kNotFound; | |
709 } | |
710 return index; | |
711 } | |
712 | |
713 ALWAYS_INLINE size_t reverseFind(const UChar* characters, | |
714 unsigned length, | |
715 LChar matchCharacter, | |
716 unsigned index = UINT_MAX) { | |
717 return reverseFind(characters, length, static_cast<UChar>(matchCharacter), | |
718 index); | |
719 } | |
720 | |
721 inline size_t reverseFind(const LChar* characters, | |
722 unsigned length, | |
723 UChar matchCharacter, | |
724 unsigned index = UINT_MAX) { | |
725 if (matchCharacter & ~0xFF) | |
726 return kNotFound; | |
727 return reverseFind(characters, length, static_cast<LChar>(matchCharacter), | |
728 index); | |
729 } | |
730 | |
731 inline size_t StringImpl::find(LChar character, unsigned start) { | |
732 if (is8Bit()) | |
733 return WTF::find(characters8(), m_length, character, start); | |
734 return WTF::find(characters16(), m_length, character, start); | |
735 } | |
736 | |
737 ALWAYS_INLINE size_t StringImpl::find(char character, unsigned start) { | |
738 return find(static_cast<LChar>(character), start); | |
739 } | |
740 | |
741 inline size_t StringImpl::find(UChar character, unsigned start) { | |
742 if (is8Bit()) | |
743 return WTF::find(characters8(), m_length, character, start); | |
744 return WTF::find(characters16(), m_length, character, start); | |
745 } | |
746 | |
747 inline unsigned lengthOfNullTerminatedString(const UChar* string) { | |
748 size_t length = 0; | |
749 while (string[length] != UChar(0)) | |
750 ++length; | |
751 RELEASE_ASSERT(length <= std::numeric_limits<unsigned>::max()); | |
752 return static_cast<unsigned>(length); | |
753 } | |
754 | |
755 template <size_t inlineCapacity> | |
756 bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, | |
757 StringImpl* b) { | |
758 if (!b) | |
759 return !a.size(); | |
760 if (a.size() != b->length()) | |
761 return false; | |
762 if (b->is8Bit()) | |
763 return equal(a.data(), b->characters8(), b->length()); | |
764 return equal(a.data(), b->characters16(), b->length()); | |
765 } | |
766 | |
767 template <typename CharacterType1, typename CharacterType2> | |
768 static inline int codePointCompare(unsigned l1, | |
769 unsigned l2, | |
770 const CharacterType1* c1, | |
771 const CharacterType2* c2) { | |
772 const unsigned lmin = l1 < l2 ? l1 : l2; | |
773 unsigned pos = 0; | |
774 while (pos < lmin && *c1 == *c2) { | |
775 ++c1; | |
776 ++c2; | |
777 ++pos; | |
778 } | |
779 | |
780 if (pos < lmin) | |
781 return (c1[0] > c2[0]) ? 1 : -1; | |
782 | |
783 if (l1 == l2) | |
784 return 0; | |
785 | |
786 return (l1 > l2) ? 1 : -1; | |
787 } | |
788 | |
789 static inline int codePointCompare8(const StringImpl* string1, | |
790 const StringImpl* string2) { | |
791 return codePointCompare(string1->length(), string2->length(), | |
792 string1->characters8(), string2->characters8()); | |
793 } | |
794 | |
795 static inline int codePointCompare16(const StringImpl* string1, | |
796 const StringImpl* string2) { | |
797 return codePointCompare(string1->length(), string2->length(), | |
798 string1->characters16(), string2->characters16()); | |
799 } | |
800 | |
801 static inline int codePointCompare8To16(const StringImpl* string1, | |
802 const StringImpl* string2) { | |
803 return codePointCompare(string1->length(), string2->length(), | |
804 string1->characters8(), string2->characters16()); | |
805 } | |
806 | |
807 static inline int codePointCompare(const StringImpl* string1, | |
808 const StringImpl* string2) { | |
809 if (!string1) | |
810 return (string2 && string2->length()) ? -1 : 0; | |
811 | |
812 if (!string2) | |
813 return string1->length() ? 1 : 0; | |
814 | |
815 bool string1Is8Bit = string1->is8Bit(); | |
816 bool string2Is8Bit = string2->is8Bit(); | |
817 if (string1Is8Bit) { | |
818 if (string2Is8Bit) | |
819 return codePointCompare8(string1, string2); | |
820 return codePointCompare8To16(string1, string2); | |
821 } | |
822 if (string2Is8Bit) | |
823 return -codePointCompare8To16(string2, string1); | |
824 return codePointCompare16(string1, string2); | |
825 } | |
826 | |
827 static inline bool isSpaceOrNewline(UChar c) { | |
828 // Use isASCIISpace() for basic Latin-1. | |
829 // This will include newlines, which aren't included in Unicode DirWS. | |
830 return c <= 0x7F | |
831 ? WTF::isASCIISpace(c) | |
832 : WTF::Unicode::direction(c) == WTF::Unicode::WhiteSpaceNeutral; | |
833 } | |
834 | |
835 inline PassRefPtr<StringImpl> StringImpl::isolatedCopy() const { | |
836 if (is8Bit()) | |
837 return create(characters8(), m_length); | |
838 return create(characters16(), m_length); | |
839 } | |
840 | |
841 template <typename BufferType> | |
842 inline void StringImpl::appendTo(BufferType& result, | |
843 unsigned start, | |
844 unsigned length) const { | |
845 unsigned numberOfCharactersToCopy = std::min(length, m_length - start); | |
846 if (!numberOfCharactersToCopy) | |
847 return; | |
848 if (is8Bit()) | |
849 result.append(characters8() + start, numberOfCharactersToCopy); | |
850 else | |
851 result.append(characters16() + start, numberOfCharactersToCopy); | |
852 } | |
853 | |
854 template <typename BufferType> | |
855 inline void StringImpl::prependTo(BufferType& result, | |
856 unsigned start, | |
857 unsigned length) const { | |
858 unsigned numberOfCharactersToCopy = std::min(length, m_length - start); | |
859 if (!numberOfCharactersToCopy) | |
860 return; | |
861 if (is8Bit()) | |
862 result.prepend(characters8() + start, numberOfCharactersToCopy); | |
863 else | |
864 result.prepend(characters16() + start, numberOfCharactersToCopy); | |
865 } | |
866 | |
867 // TODO(rob.buis) possibly find a better place for this method. | |
868 // Turns a UChar32 to uppercase based on localeIdentifier. | |
869 WTF_EXPORT UChar32 toUpper(UChar32, const AtomicString& localeIdentifier); | |
870 | |
871 struct StringHash; | |
872 | |
873 // StringHash is the default hash for StringImpl* and RefPtr<StringImpl> | |
874 template <typename T> | |
875 struct DefaultHash; | |
876 template <> | |
877 struct DefaultHash<StringImpl*> { | |
878 typedef StringHash Hash; | |
879 }; | |
880 template <> | |
881 struct DefaultHash<RefPtr<StringImpl>> { | |
882 typedef StringHash Hash; | |
883 }; | |
884 | |
885 } // namespace WTF | |
886 | |
887 using WTF::StringImpl; | |
888 using WTF::TextCaseASCIIInsensitive; | |
889 using WTF::TextCaseUnicodeInsensitive; | |
890 using WTF::TextCaseSensitive; | |
891 using WTF::TextCaseSensitivity; | |
892 using WTF::equal; | |
893 using WTF::equalNonNull; | |
894 using WTF::lengthOfNullTerminatedString; | |
895 using WTF::reverseFind; | |
896 | |
897 #endif | |
OLD | NEW |