OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org) | |
3 * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2013 Apple Inc. All rights
reserved. | |
4 * Copyright (C) 2009 Google Inc. All rights reserved. | |
5 * | |
6 * This library is free software; you can redistribute it and/or | |
7 * modify it under the terms of the GNU Library General Public | |
8 * License as published by the Free Software Foundation; either | |
9 * version 2 of the License, or (at your option) any later version. | |
10 * | |
11 * This library is distributed in the hope that it will be useful, | |
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 * Library General Public License for more details. | |
15 * | |
16 * You should have received a copy of the GNU Library General Public License | |
17 * along with this library; see the file COPYING.LIB. If not, write to | |
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
19 * Boston, MA 02110-1301, USA. | |
20 * | |
21 */ | |
22 | |
23 #ifndef StringImpl_h | |
24 #define StringImpl_h | |
25 | |
26 #include <limits.h> | |
27 #include <wtf/ASCIICType.h> | |
28 #include <wtf/Forward.h> | |
29 #include <wtf/StdLibExtras.h> | |
30 #include <wtf/StringHasher.h> | |
31 #include <wtf/Vector.h> | |
32 #include <wtf/unicode/Unicode.h> | |
33 | |
34 #if USE(CF) | |
35 typedef const struct __CFString * CFStringRef; | |
36 #endif | |
37 | |
38 #ifdef __OBJC__ | |
39 @class NSString; | |
40 #endif | |
41 | |
42 namespace WTF { | |
43 | |
44 struct CStringTranslator; | |
45 template<typename CharacterType> struct HashAndCharactersTranslator; | |
46 struct HashAndUTF8CharactersTranslator; | |
47 struct LCharBufferTranslator; | |
48 struct CharBufferFromLiteralDataTranslator; | |
49 class MemoryObjectInfo; | |
50 struct SubstringTranslator; | |
51 struct UCharBufferTranslator; | |
52 template<typename> class RetainPtr; | |
53 | |
54 enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive }; | |
55 | |
56 typedef bool (*CharacterMatchFunctionPtr)(UChar); | |
57 typedef bool (*IsWhiteSpaceFunctionPtr)(UChar); | |
58 | |
59 // Define STRING_STATS to turn on run time statistics of string sizes and memory
usage | |
60 #undef STRING_STATS | |
61 | |
62 #ifdef STRING_STATS | |
63 struct StringStats { | |
64 inline void add8BitString(unsigned length, bool isSubString = false) | |
65 { | |
66 ++m_totalNumberStrings; | |
67 ++m_number8BitStrings; | |
68 if (!isSubString) | |
69 m_total8BitData += length; | |
70 } | |
71 | |
72 inline void add16BitString(unsigned length, bool isSubString = false) | |
73 { | |
74 ++m_totalNumberStrings; | |
75 ++m_number16BitStrings; | |
76 if (!isSubString) | |
77 m_total16BitData += length; | |
78 } | |
79 | |
80 inline void addUpconvertedString(unsigned length) | |
81 { | |
82 ++m_numberUpconvertedStrings; | |
83 m_totalUpconvertedData += length; | |
84 } | |
85 | |
86 void removeString(StringImpl*); | |
87 void printStats(); | |
88 | |
89 static const unsigned s_printStringStatsFrequency = 5000; | |
90 static unsigned s_stringRemovesTillPrintStats; | |
91 | |
92 unsigned m_totalNumberStrings; | |
93 unsigned m_number8BitStrings; | |
94 unsigned m_number16BitStrings; | |
95 unsigned m_numberUpconvertedStrings; | |
96 unsigned long long m_total8BitData; | |
97 unsigned long long m_total16BitData; | |
98 unsigned long long m_totalUpconvertedData; | |
99 }; | |
100 | |
101 #define STRING_STATS_ADD_8BIT_STRING(length) StringImpl::stringStats().add8BitSt
ring(length) | |
102 #define STRING_STATS_ADD_8BIT_STRING2(length, isSubString) StringImpl::stringSta
ts().add8BitString(length, isSubString) | |
103 #define STRING_STATS_ADD_16BIT_STRING(length) StringImpl::stringStats().add16Bit
String(length) | |
104 #define STRING_STATS_ADD_16BIT_STRING2(length, isSubString) StringImpl::stringSt
ats().add16BitString(length, isSubString) | |
105 #define STRING_STATS_ADD_UPCONVERTED_STRING(length) StringImpl::stringStats().ad
dUpconvertedString(length) | |
106 #define STRING_STATS_REMOVE_STRING(string) StringImpl::stringStats().removeStrin
g(string) | |
107 #else | |
108 #define STRING_STATS_ADD_8BIT_STRING(length) ((void)0) | |
109 #define STRING_STATS_ADD_8BIT_STRING2(length, isSubString) ((void)0) | |
110 #define STRING_STATS_ADD_16BIT_STRING(length) ((void)0) | |
111 #define STRING_STATS_ADD_16BIT_STRING2(length, isSubString) ((void)0) | |
112 #define STRING_STATS_ADD_UPCONVERTED_STRING(length) ((void)0) | |
113 #define STRING_STATS_REMOVE_STRING(string) ((void)0) | |
114 #endif | |
115 | |
116 class StringImpl { | |
117 WTF_MAKE_NONCOPYABLE(StringImpl); WTF_MAKE_FAST_ALLOCATED; | |
118 friend struct WTF::CStringTranslator; | |
119 template<typename CharacterType> friend struct WTF::HashAndCharactersTransla
tor; | |
120 friend struct WTF::HashAndUTF8CharactersTranslator; | |
121 friend struct WTF::CharBufferFromLiteralDataTranslator; | |
122 friend struct WTF::LCharBufferTranslator; | |
123 friend struct WTF::SubstringTranslator; | |
124 friend struct WTF::UCharBufferTranslator; | |
125 friend class AtomicStringImpl; | |
126 | |
127 private: | |
128 enum BufferOwnership { | |
129 BufferInternal, | |
130 BufferOwned, | |
131 BufferSubstring, | |
132 // NOTE: Adding more ownership types needs to extend m_hashAndFlags as w
e're at capacity | |
133 }; | |
134 | |
135 // Used to construct static strings, which have an special refCount that can
never hit zero. | |
136 // This means that the static string will never be destroyed, which is impor
tant because | |
137 // static strings will be shared across threads & ref-counted in a non-threa
dsafe manner. | |
138 enum ConstructStaticStringTag { ConstructStaticString }; | |
139 StringImpl(const UChar* characters, unsigned length, ConstructStaticStringTa
g) | |
140 : m_refCount(s_refCountFlagIsStaticString) | |
141 , m_length(length) | |
142 , m_data16(characters) | |
143 , m_buffer(0) | |
144 , m_hashAndFlags(s_hashFlagIsIdentifier | BufferOwned) | |
145 { | |
146 // Ensure that the hash is computed so that AtomicStringHash can call ex
istingHash() | |
147 // with impunity. The empty string is special because it is never entere
d into | |
148 // AtomicString's HashKey, but still needs to compare correctly. | |
149 STRING_STATS_ADD_16BIT_STRING(m_length); | |
150 | |
151 hash(); | |
152 } | |
153 | |
154 // Used to construct static strings, which have an special refCount that can
never hit zero. | |
155 // This means that the static string will never be destroyed, which is impor
tant because | |
156 // static strings will be shared across threads & ref-counted in a non-threa
dsafe manner. | |
157 StringImpl(const LChar* characters, unsigned length, ConstructStaticStringTa
g) | |
158 : m_refCount(s_refCountFlagIsStaticString) | |
159 , m_length(length) | |
160 , m_data8(characters) | |
161 , m_buffer(0) | |
162 , m_hashAndFlags(s_hashFlag8BitBuffer | s_hashFlagIsIdentifier | BufferO
wned) | |
163 { | |
164 // Ensure that the hash is computed so that AtomicStringHash can call ex
istingHash() | |
165 // with impunity. The empty string is special because it is never entere
d into | |
166 // AtomicString's HashKey, but still needs to compare correctly. | |
167 STRING_STATS_ADD_8BIT_STRING(m_length); | |
168 | |
169 hash(); | |
170 } | |
171 | |
172 // FIXME: there has to be a less hacky way to do this. | |
173 enum Force8Bit { Force8BitConstructor }; | |
174 // Create a normal 8-bit string with internal storage (BufferInternal) | |
175 StringImpl(unsigned length, Force8Bit) | |
176 : m_refCount(s_refCountIncrement) | |
177 , m_length(length) | |
178 , m_data8(reinterpret_cast<const LChar*>(this + 1)) | |
179 , m_buffer(0) | |
180 , m_hashAndFlags(s_hashFlag8BitBuffer | BufferInternal) | |
181 { | |
182 ASSERT(m_data8); | |
183 ASSERT(m_length); | |
184 | |
185 STRING_STATS_ADD_8BIT_STRING(m_length); | |
186 } | |
187 | |
188 // Create a normal 16-bit string with internal storage (BufferInternal) | |
189 StringImpl(unsigned length) | |
190 : m_refCount(s_refCountIncrement) | |
191 , m_length(length) | |
192 , m_data16(reinterpret_cast<const UChar*>(this + 1)) | |
193 , m_buffer(0) | |
194 , m_hashAndFlags(BufferInternal) | |
195 { | |
196 ASSERT(m_data16); | |
197 ASSERT(m_length); | |
198 | |
199 STRING_STATS_ADD_16BIT_STRING(m_length); | |
200 } | |
201 | |
202 // Create a StringImpl adopting ownership of the provided buffer (BufferOwne
d) | |
203 StringImpl(const LChar* characters, unsigned length) | |
204 : m_refCount(s_refCountIncrement) | |
205 , m_length(length) | |
206 , m_data8(characters) | |
207 , m_buffer(0) | |
208 , m_hashAndFlags(s_hashFlag8BitBuffer | BufferOwned) | |
209 { | |
210 ASSERT(m_data8); | |
211 ASSERT(m_length); | |
212 | |
213 STRING_STATS_ADD_8BIT_STRING(m_length); | |
214 } | |
215 | |
216 enum ConstructFromLiteralTag { ConstructFromLiteral }; | |
217 StringImpl(const char* characters, unsigned length, ConstructFromLiteralTag) | |
218 : m_refCount(s_refCountIncrement) | |
219 , m_length(length) | |
220 , m_data8(reinterpret_cast<const LChar*>(characters)) | |
221 , m_buffer(0) | |
222 , m_hashAndFlags(s_hashFlag8BitBuffer | BufferInternal | s_hashFlagHasTe
rminatingNullCharacter) | |
223 { | |
224 ASSERT(m_data8); | |
225 ASSERT(m_length); | |
226 ASSERT(!characters[length]); | |
227 | |
228 STRING_STATS_ADD_8BIT_STRING(0); | |
229 } | |
230 | |
231 // Create a StringImpl adopting ownership of the provided buffer (BufferOwne
d) | |
232 StringImpl(const UChar* characters, unsigned length) | |
233 : m_refCount(s_refCountIncrement) | |
234 , m_length(length) | |
235 , m_data16(characters) | |
236 , m_buffer(0) | |
237 , m_hashAndFlags(BufferOwned) | |
238 { | |
239 ASSERT(m_data16); | |
240 ASSERT(m_length); | |
241 | |
242 STRING_STATS_ADD_16BIT_STRING(m_length); | |
243 } | |
244 | |
245 // Used to create new strings that are a substring of an existing 8-bit Stri
ngImpl (BufferSubstring) | |
246 StringImpl(const LChar* characters, unsigned length, PassRefPtr<StringImpl>
base) | |
247 : m_refCount(s_refCountIncrement) | |
248 , m_length(length) | |
249 , m_data8(characters) | |
250 , m_substringBuffer(base.leakRef()) | |
251 , m_hashAndFlags(s_hashFlag8BitBuffer | BufferSubstring) | |
252 { | |
253 ASSERT(is8Bit()); | |
254 ASSERT(m_data8); | |
255 ASSERT(m_length); | |
256 ASSERT(m_substringBuffer->bufferOwnership() != BufferSubstring); | |
257 | |
258 STRING_STATS_ADD_8BIT_STRING2(m_length, true); | |
259 } | |
260 | |
261 // Used to create new strings that are a substring of an existing 16-bit Str
ingImpl (BufferSubstring) | |
262 StringImpl(const UChar* characters, unsigned length, PassRefPtr<StringImpl>
base) | |
263 : m_refCount(s_refCountIncrement) | |
264 , m_length(length) | |
265 , m_data16(characters) | |
266 , m_substringBuffer(base.leakRef()) | |
267 , m_hashAndFlags(BufferSubstring) | |
268 { | |
269 ASSERT(!is8Bit()); | |
270 ASSERT(m_data16); | |
271 ASSERT(m_length); | |
272 ASSERT(m_substringBuffer->bufferOwnership() != BufferSubstring); | |
273 | |
274 STRING_STATS_ADD_16BIT_STRING2(m_length, true); | |
275 } | |
276 | |
277 enum CreateEmptyUnique_T { CreateEmptyUnique }; | |
278 StringImpl(CreateEmptyUnique_T) | |
279 : m_refCount(s_refCountIncrement) | |
280 , m_length(0) | |
281 , m_data16(reinterpret_cast<const UChar*>(1)) | |
282 , m_buffer(0) | |
283 { | |
284 ASSERT(m_data16); | |
285 // Set the hash early, so that all empty unique StringImpls have a hash, | |
286 // and don't use the normal hashing algorithm - the unique nature of the
se | |
287 // keys means that we don't need them to match any other string (in fact
, | |
288 // that's exactly the oposite of what we want!), and teh normal hash wou
ld | |
289 // lead to lots of conflicts. | |
290 unsigned hash = reinterpret_cast<uintptr_t>(this); | |
291 hash <<= s_flagCount; | |
292 if (!hash) | |
293 hash = 1 << s_flagCount; | |
294 m_hashAndFlags = hash | BufferInternal; | |
295 | |
296 STRING_STATS_ADD_16BIT_STRING(m_length); | |
297 } | |
298 public: | |
299 WTF_EXPORT_STRING_API ~StringImpl(); | |
300 | |
301 WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> create(const UChar*, uns
igned length); | |
302 WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> create(const LChar*, uns
igned length); | |
303 WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> create8BitIfPossible(con
st UChar*, unsigned length); | |
304 template<size_t inlineCapacity> | |
305 static PassRefPtr<StringImpl> create8BitIfPossible(const Vector<UChar, inlin
eCapacity>& vector) | |
306 { | |
307 return create8BitIfPossible(vector.data(), vector.size()); | |
308 } | |
309 | |
310 ALWAYS_INLINE static PassRefPtr<StringImpl> create(const char* s, unsigned l
ength) { return create(reinterpret_cast<const LChar*>(s), length); } | |
311 WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> create(const LChar*); | |
312 ALWAYS_INLINE static PassRefPtr<StringImpl> create(const char* s) { return c
reate(reinterpret_cast<const LChar*>(s)); } | |
313 | |
314 static ALWAYS_INLINE PassRefPtr<StringImpl> create8(PassRefPtr<StringImpl> r
ep, unsigned offset, unsigned length) | |
315 { | |
316 ASSERT(rep); | |
317 ASSERT(length <= rep->length()); | |
318 | |
319 if (!length) | |
320 return empty(); | |
321 | |
322 ASSERT(rep->is8Bit()); | |
323 StringImpl* ownerRep = (rep->bufferOwnership() == BufferSubstring) ? rep
->m_substringBuffer : rep.get(); | |
324 return adoptRef(new StringImpl(rep->m_data8 + offset, length, ownerRep))
; | |
325 } | |
326 | |
327 static ALWAYS_INLINE PassRefPtr<StringImpl> create(PassRefPtr<StringImpl> re
p, unsigned offset, unsigned length) | |
328 { | |
329 ASSERT(rep); | |
330 ASSERT(length <= rep->length()); | |
331 | |
332 if (!length) | |
333 return empty(); | |
334 | |
335 StringImpl* ownerRep = (rep->bufferOwnership() == BufferSubstring) ? rep
->m_substringBuffer : rep.get(); | |
336 if (rep->is8Bit()) | |
337 return adoptRef(new StringImpl(rep->m_data8 + offset, length, ownerR
ep)); | |
338 return adoptRef(new StringImpl(rep->m_data16 + offset, length, ownerRep)
); | |
339 } | |
340 | |
341 WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> createFromLiteral(const
char* characters, unsigned length); | |
342 template<unsigned charactersCount> | |
343 ALWAYS_INLINE static PassRefPtr<StringImpl> createFromLiteral(const char (&c
haracters)[charactersCount]) | |
344 { | |
345 COMPILE_ASSERT(charactersCount > 1, StringImplFromLiteralNotEmpty); | |
346 COMPILE_ASSERT((charactersCount - 1 <= ((unsigned(~0) - sizeof(StringImp
l)) / sizeof(LChar))), StringImplFromLiteralCannotOverflow); | |
347 | |
348 return createFromLiteral(characters, charactersCount - 1); | |
349 } | |
350 WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> createFromLiteral(const
char* characters); | |
351 | |
352 WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> createUninitialized(unsi
gned length, LChar*& data); | |
353 WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> createUninitialized(unsi
gned length, UChar*& data); | |
354 template <typename T> static ALWAYS_INLINE PassRefPtr<StringImpl> tryCreateU
ninitialized(unsigned length, T*& output) | |
355 { | |
356 if (!length) { | |
357 output = 0; | |
358 return empty(); | |
359 } | |
360 | |
361 if (length > ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)
) / sizeof(T))) { | |
362 output = 0; | |
363 return 0; | |
364 } | |
365 StringImpl* resultImpl; | |
366 if (!tryFastMalloc(sizeof(T) * length + sizeof(StringImpl)).getValue(res
ultImpl)) { | |
367 output = 0; | |
368 return 0; | |
369 } | |
370 output = reinterpret_cast<T*>(resultImpl + 1); | |
371 | |
372 if (sizeof(T) == sizeof(char)) | |
373 return adoptRef(new (NotNull, resultImpl) StringImpl(length, Force8B
itConstructor)); | |
374 | |
375 return adoptRef(new (NotNull, resultImpl) StringImpl(length)); | |
376 } | |
377 | |
378 static PassRefPtr<StringImpl> createEmptyUnique() | |
379 { | |
380 return adoptRef(new StringImpl(CreateEmptyUnique)); | |
381 } | |
382 | |
383 // Reallocate the StringImpl. The originalString must be only owned by the P
assRefPtr, | |
384 // and the buffer ownership must be BufferInternal. Just like the input poin
ter of realloc(), | |
385 // the originalString can't be used after this function. | |
386 static PassRefPtr<StringImpl> reallocate(PassRefPtr<StringImpl> originalStri
ng, unsigned length, LChar*& data); | |
387 static PassRefPtr<StringImpl> reallocate(PassRefPtr<StringImpl> originalStri
ng, unsigned length, UChar*& data); | |
388 | |
389 static unsigned flagsOffset() { return OBJECT_OFFSETOF(StringImpl, m_hashAnd
Flags); } | |
390 static unsigned flagIs8Bit() { return s_hashFlag8BitBuffer; } | |
391 static unsigned dataOffset() { return OBJECT_OFFSETOF(StringImpl, m_data8);
} | |
392 static PassRefPtr<StringImpl> createWithTerminatingNullCharacter(const Strin
gImpl&); | |
393 | |
394 template<typename CharType, size_t inlineCapacity> | |
395 static PassRefPtr<StringImpl> adopt(Vector<CharType, inlineCapacity>& vector
) | |
396 { | |
397 if (size_t size = vector.size()) { | |
398 ASSERT(vector.data()); | |
399 RELEASE_ASSERT(size <= std::numeric_limits<unsigned>::max()); | |
400 return adoptRef(new StringImpl(vector.releaseBuffer(), size)); | |
401 } | |
402 return empty(); | |
403 } | |
404 | |
405 WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> adopt(StringBuffer<UChar
>&); | |
406 WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> adopt(StringBuffer<LChar
>&); | |
407 | |
408 unsigned length() const { return m_length; } | |
409 bool is8Bit() const { return m_hashAndFlags & s_hashFlag8BitBuffer; } | |
410 bool hasInternalBuffer() const { return bufferOwnership() == BufferInternal;
} | |
411 bool hasOwnedBuffer() const { return bufferOwnership() == BufferOwned; } | |
412 StringImpl* baseString() const { return bufferOwnership() == BufferSubstring
? m_substringBuffer : 0; } | |
413 | |
414 // FIXME: Remove all unnecessary usages of characters() | |
415 ALWAYS_INLINE const LChar* characters8() const { ASSERT(is8Bit()); return m_
data8; } | |
416 ALWAYS_INLINE const UChar* characters16() const { ASSERT(!is8Bit()); return
m_data16; } | |
417 ALWAYS_INLINE const UChar* characters() const | |
418 { | |
419 if (!is8Bit()) | |
420 return m_data16; | |
421 | |
422 return getData16SlowCase(); | |
423 } | |
424 | |
425 template <typename CharType> | |
426 ALWAYS_INLINE const CharType * getCharacters() const; | |
427 | |
428 size_t cost() | |
429 { | |
430 // For substrings, return the cost of the base string. | |
431 if (bufferOwnership() == BufferSubstring) | |
432 return m_substringBuffer->cost(); | |
433 | |
434 if (m_hashAndFlags & s_hashFlagDidReportCost) | |
435 return 0; | |
436 | |
437 m_hashAndFlags |= s_hashFlagDidReportCost; | |
438 return m_length; | |
439 } | |
440 | |
441 WTF_EXPORT_STRING_API size_t sizeInBytes() const; | |
442 | |
443 bool has16BitShadow() const { return m_hashAndFlags & s_hashFlagHas16BitShad
ow; } | |
444 WTF_EXPORT_STRING_API void upconvertCharacters(unsigned, unsigned) const; | |
445 bool isIdentifier() const { return m_hashAndFlags & s_hashFlagIsIdentifier;
} | |
446 void setIsIdentifier(bool isIdentifier) | |
447 { | |
448 ASSERT(!isStatic()); | |
449 if (isIdentifier) | |
450 m_hashAndFlags |= s_hashFlagIsIdentifier; | |
451 else | |
452 m_hashAndFlags &= ~s_hashFlagIsIdentifier; | |
453 } | |
454 | |
455 bool isEmptyUnique() const | |
456 { | |
457 return !length() && !isStatic(); | |
458 } | |
459 | |
460 bool hasTerminatingNullCharacter() const { return m_hashAndFlags & s_hashFla
gHasTerminatingNullCharacter; } | |
461 | |
462 bool isAtomic() const { return m_hashAndFlags & s_hashFlagIsAtomic; } | |
463 void setIsAtomic(bool isAtomic) | |
464 { | |
465 if (isAtomic) | |
466 m_hashAndFlags |= s_hashFlagIsAtomic; | |
467 else | |
468 m_hashAndFlags &= ~s_hashFlagIsAtomic; | |
469 } | |
470 | |
471 bool isStatic() const { return m_refCount & s_refCountFlagIsStaticString; } | |
472 | |
473 #ifdef STRING_STATS | |
474 bool isSubString() const { return bufferOwnership() == BufferSubstring; } | |
475 #endif | |
476 | |
477 private: | |
478 // The high bits of 'hash' are always empty, but we prefer to store our flag
s | |
479 // in the low bits because it makes them slightly more efficient to access. | |
480 // So, we shift left and right when setting and getting our hash code. | |
481 void setHash(unsigned hash) const | |
482 { | |
483 ASSERT(!hasHash()); | |
484 // Multiple clients assume that StringHasher is the canonical string has
h function. | |
485 ASSERT(hash == (is8Bit() ? StringHasher::computeHashAndMaskTop8Bits(m_da
ta8, m_length) : StringHasher::computeHashAndMaskTop8Bits(m_data16, m_length))); | |
486 ASSERT(!(hash & (s_flagMask << (8 * sizeof(hash) - s_flagCount)))); // V
erify that enough high bits are empty. | |
487 | |
488 hash <<= s_flagCount; | |
489 ASSERT(!(hash & m_hashAndFlags)); // Verify that enough low bits are emp
ty after shift. | |
490 ASSERT(hash); // Verify that 0 is a valid sentinel hash value. | |
491 | |
492 m_hashAndFlags |= hash; // Store hash with flags in low bits. | |
493 } | |
494 | |
495 unsigned rawHash() const | |
496 { | |
497 return m_hashAndFlags >> s_flagCount; | |
498 } | |
499 | |
500 public: | |
501 bool hasHash() const | |
502 { | |
503 return rawHash() != 0; | |
504 } | |
505 | |
506 unsigned existingHash() const | |
507 { | |
508 ASSERT(hasHash()); | |
509 return rawHash(); | |
510 } | |
511 | |
512 unsigned hash() const | |
513 { | |
514 if (hasHash()) | |
515 return existingHash(); | |
516 return hashSlowCase(); | |
517 } | |
518 | |
519 inline bool hasOneRef() const | |
520 { | |
521 return m_refCount == s_refCountIncrement; | |
522 } | |
523 | |
524 inline void ref() | |
525 { | |
526 m_refCount += s_refCountIncrement; | |
527 } | |
528 | |
529 inline void deref() | |
530 { | |
531 if (m_refCount == s_refCountIncrement) { | |
532 delete this; | |
533 return; | |
534 } | |
535 | |
536 m_refCount -= s_refCountIncrement; | |
537 } | |
538 | |
539 WTF_EXPORT_PRIVATE static StringImpl* empty(); | |
540 | |
541 // FIXME: Does this really belong in StringImpl? | |
542 template <typename T> static void copyChars(T* destination, const T* source,
unsigned numCharacters) | |
543 { | |
544 if (numCharacters == 1) { | |
545 *destination = *source; | |
546 return; | |
547 } | |
548 | |
549 if (numCharacters <= s_copyCharsInlineCutOff) { | |
550 unsigned i = 0; | |
551 #if (CPU(X86) || CPU(X86_64)) | |
552 const unsigned charsPerInt = sizeof(uint32_t) / sizeof(T); | |
553 | |
554 if (numCharacters > charsPerInt) { | |
555 unsigned stopCount = numCharacters & ~(charsPerInt - 1); | |
556 | |
557 const uint32_t* srcCharacters = reinterpret_cast<const uint32_t*
>(source); | |
558 uint32_t* destCharacters = reinterpret_cast<uint32_t*>(destinati
on); | |
559 for (unsigned j = 0; i < stopCount; i += charsPerInt, ++j) | |
560 destCharacters[j] = srcCharacters[j]; | |
561 } | |
562 #endif | |
563 for (; i < numCharacters; ++i) | |
564 destination[i] = source[i]; | |
565 } else | |
566 memcpy(destination, source, numCharacters * sizeof(T)); | |
567 } | |
568 | |
569 ALWAYS_INLINE static void copyChars(UChar* destination, const LChar* source,
unsigned numCharacters) | |
570 { | |
571 for (unsigned i = 0; i < numCharacters; ++i) | |
572 destination[i] = source[i]; | |
573 } | |
574 | |
575 // Some string features, like refcounting and the atomicity flag, are not | |
576 // thread-safe. We achieve thread safety by isolation, giving each thread | |
577 // its own copy of the string. | |
578 PassRefPtr<StringImpl> isolatedCopy() const; | |
579 | |
580 WTF_EXPORT_STRING_API PassRefPtr<StringImpl> substring(unsigned pos, unsigne
d len = UINT_MAX); | |
581 | |
582 UChar operator[](unsigned i) const | |
583 { | |
584 ASSERT_WITH_SECURITY_IMPLICATION(i < m_length); | |
585 if (is8Bit()) | |
586 return m_data8[i]; | |
587 return m_data16[i]; | |
588 } | |
589 WTF_EXPORT_STRING_API UChar32 characterStartingAt(unsigned); | |
590 | |
591 WTF_EXPORT_STRING_API bool containsOnlyWhitespace(); | |
592 | |
593 int toIntStrict(bool* ok = 0, int base = 10); | |
594 unsigned toUIntStrict(bool* ok = 0, int base = 10); | |
595 int64_t toInt64Strict(bool* ok = 0, int base = 10); | |
596 uint64_t toUInt64Strict(bool* ok = 0, int base = 10); | |
597 intptr_t toIntPtrStrict(bool* ok = 0, int base = 10); | |
598 | |
599 WTF_EXPORT_STRING_API int toInt(bool* ok = 0); // ignores trailing garbage | |
600 unsigned toUInt(bool* ok = 0); // ignores trailing garbage | |
601 int64_t toInt64(bool* ok = 0); // ignores trailing garbage | |
602 uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage | |
603 intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage | |
604 | |
605 // FIXME: Like the strict functions above, these give false for "ok" when th
ere is trailing garbage. | |
606 // Like the non-strict functions above, these return the value when there is
trailing garbage. | |
607 // It would be better if these were more consistent with the above functions
instead. | |
608 double toDouble(bool* ok = 0); | |
609 float toFloat(bool* ok = 0); | |
610 | |
611 WTF_EXPORT_STRING_API PassRefPtr<StringImpl> lower(); | |
612 WTF_EXPORT_STRING_API PassRefPtr<StringImpl> upper(); | |
613 | |
614 WTF_EXPORT_STRING_API PassRefPtr<StringImpl> fill(UChar); | |
615 // FIXME: Do we need fill(char) or can we just do the right thing if UChar i
s ASCII? | |
616 PassRefPtr<StringImpl> foldCase(); | |
617 | |
618 PassRefPtr<StringImpl> stripWhiteSpace(); | |
619 PassRefPtr<StringImpl> stripWhiteSpace(IsWhiteSpaceFunctionPtr); | |
620 WTF_EXPORT_STRING_API PassRefPtr<StringImpl> simplifyWhiteSpace(); | |
621 PassRefPtr<StringImpl> simplifyWhiteSpace(IsWhiteSpaceFunctionPtr); | |
622 | |
623 PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr); | |
624 template <typename CharType> | |
625 ALWAYS_INLINE PassRefPtr<StringImpl> removeCharacters(const CharType* charac
ters, CharacterMatchFunctionPtr); | |
626 | |
627 size_t find(LChar character, unsigned start = 0); | |
628 size_t find(char character, unsigned start = 0); | |
629 size_t find(UChar character, unsigned start = 0); | |
630 WTF_EXPORT_STRING_API size_t find(CharacterMatchFunctionPtr, unsigned index
= 0); | |
631 size_t find(const LChar*, unsigned index = 0); | |
632 ALWAYS_INLINE size_t find(const char* s, unsigned index = 0) { return find(r
einterpret_cast<const LChar*>(s), index); } | |
633 WTF_EXPORT_STRING_API size_t find(StringImpl*); | |
634 WTF_EXPORT_STRING_API size_t find(StringImpl*, unsigned index); | |
635 size_t findIgnoringCase(const LChar*, unsigned index = 0); | |
636 ALWAYS_INLINE size_t findIgnoringCase(const char* s, unsigned index = 0) { r
eturn findIgnoringCase(reinterpret_cast<const LChar*>(s), index); } | |
637 WTF_EXPORT_STRING_API size_t findIgnoringCase(StringImpl*, unsigned index =
0); | |
638 | |
639 WTF_EXPORT_STRING_API size_t findNextLineStart(unsigned index = UINT_MAX); | |
640 | |
641 WTF_EXPORT_STRING_API size_t reverseFind(UChar, unsigned index = UINT_MAX); | |
642 WTF_EXPORT_STRING_API size_t reverseFind(StringImpl*, unsigned index = UINT_
MAX); | |
643 WTF_EXPORT_STRING_API size_t reverseFindIgnoringCase(StringImpl*, unsigned i
ndex = UINT_MAX); | |
644 | |
645 bool startsWith(StringImpl* str, bool caseSensitive = true) { return (caseSe
nsitive ? reverseFind(str, 0) : reverseFindIgnoringCase(str, 0)) == 0; } | |
646 WTF_EXPORT_STRING_API bool startsWith(UChar) const; | |
647 WTF_EXPORT_STRING_API bool startsWith(const char*, unsigned matchLength, boo
l caseSensitive) const; | |
648 template<unsigned matchLength> | |
649 bool startsWith(const char (&prefix)[matchLength], bool caseSensitive = true
) const { return startsWith(prefix, matchLength - 1, caseSensitive); } | |
650 | |
651 WTF_EXPORT_STRING_API bool endsWith(StringImpl*, bool caseSensitive = true); | |
652 WTF_EXPORT_STRING_API bool endsWith(UChar) const; | |
653 WTF_EXPORT_STRING_API bool endsWith(const char*, unsigned matchLength, bool
caseSensitive) const; | |
654 template<unsigned matchLength> | |
655 bool endsWith(const char (&prefix)[matchLength], bool caseSensitive = true)
const { return endsWith(prefix, matchLength - 1, caseSensitive); } | |
656 | |
657 WTF_EXPORT_STRING_API PassRefPtr<StringImpl> replace(UChar, UChar); | |
658 WTF_EXPORT_STRING_API PassRefPtr<StringImpl> replace(UChar, StringImpl*); | |
659 ALWAYS_INLINE PassRefPtr<StringImpl> replace(UChar pattern, const char* repl
acement, unsigned replacementLength) { return replace(pattern, reinterpret_cast<
const LChar*>(replacement), replacementLength); } | |
660 WTF_EXPORT_STRING_API PassRefPtr<StringImpl> replace(UChar, const LChar*, un
signed replacementLength); | |
661 PassRefPtr<StringImpl> replace(UChar, const UChar*, unsigned replacementLeng
th); | |
662 WTF_EXPORT_STRING_API PassRefPtr<StringImpl> replace(StringImpl*, StringImpl
*); | |
663 WTF_EXPORT_STRING_API PassRefPtr<StringImpl> replace(unsigned index, unsigne
d len, StringImpl*); | |
664 | |
665 WTF_EXPORT_STRING_API WTF::Unicode::Direction defaultWritingDirection(bool*
hasStrongDirectionality = 0); | |
666 | |
667 #if USE(CF) | |
668 RetainPtr<CFStringRef> createCFString(); | |
669 #endif | |
670 #ifdef __OBJC__ | |
671 operator NSString*(); | |
672 #endif | |
673 | |
674 #ifdef STRING_STATS | |
675 ALWAYS_INLINE static StringStats& stringStats() { return m_stringStats; } | |
676 #endif | |
677 | |
678 private: | |
679 | |
680 bool isASCIILiteral() const | |
681 { | |
682 return is8Bit() && hasInternalBuffer() && reinterpret_cast<const void*>(
m_data8) != reinterpret_cast<const void*>(this + 1); | |
683 } | |
684 | |
685 // This number must be at least 2 to avoid sharing empty, null as well as 1
character strings from SmallStrings. | |
686 static const unsigned s_copyCharsInlineCutOff = 20; | |
687 | |
688 BufferOwnership bufferOwnership() const { return static_cast<BufferOwnership
>(m_hashAndFlags & s_hashMaskBufferOwnership); } | |
689 template <class UCharPredicate> PassRefPtr<StringImpl> stripMatchedCharacter
s(UCharPredicate); | |
690 template <typename CharType, class UCharPredicate> PassRefPtr<StringImpl> si
mplifyMatchedCharactersToSpace(UCharPredicate); | |
691 WTF_EXPORT_STRING_API NEVER_INLINE const UChar* getData16SlowCase() const; | |
692 WTF_EXPORT_PRIVATE NEVER_INLINE unsigned hashSlowCase() const; | |
693 | |
694 // The bottom bit in the ref count indicates a static (immortal) string. | |
695 static const unsigned s_refCountFlagIsStaticString = 0x1; | |
696 static const unsigned s_refCountIncrement = 0x2; // This allows us to ref /
deref without disturbing the static string flag. | |
697 | |
698 // The bottom 8 bits in the hash are flags. | |
699 static const unsigned s_flagCount = 8; | |
700 static const unsigned s_flagMask = (1u << s_flagCount) - 1; | |
701 COMPILE_ASSERT(s_flagCount == StringHasher::flagCount, StringHasher_reserves
_enough_bits_for_StringImpl_flags); | |
702 | |
703 static const unsigned s_hashFlagHas16BitShadow = 1u << 7; | |
704 static const unsigned s_hashFlag8BitBuffer = 1u << 6; | |
705 static const unsigned s_hashFlagHasTerminatingNullCharacter = 1u << 5; | |
706 static const unsigned s_hashFlagIsAtomic = 1u << 4; | |
707 static const unsigned s_hashFlagDidReportCost = 1u << 3; | |
708 static const unsigned s_hashFlagIsIdentifier = 1u << 2; | |
709 static const unsigned s_hashMaskBufferOwnership = 1u | (1u << 1); | |
710 | |
711 #ifdef STRING_STATS | |
712 WTF_EXPORTDATA static StringStats m_stringStats; | |
713 #endif | |
714 | |
715 public: | |
716 struct StaticASCIILiteral { | |
717 // These member variables must match the layout of StringImpl. | |
718 unsigned m_refCount; | |
719 unsigned m_length; | |
720 const LChar* m_data8; | |
721 const UChar* m_copyData16; | |
722 unsigned m_hashAndFlags; | |
723 | |
724 static const unsigned s_initialRefCount = s_refCountFlagIsStaticString; | |
725 static const unsigned s_initialFlags = s_hashFlag8BitBuffer | s_hashFlag
Has16BitShadow | BufferInternal | s_hashFlagHasTerminatingNullCharacter; | |
726 static const unsigned s_hashShift = s_flagCount; | |
727 }; | |
728 | |
729 #ifndef NDEBUG | |
730 void assertHashIsCorrect() | |
731 { | |
732 ASSERT(hasHash()); | |
733 ASSERT(existingHash() == StringHasher::computeHashAndMaskTop8Bits(charac
ters8(), length())); | |
734 } | |
735 #endif | |
736 | |
737 private: | |
738 // These member variables must match the layout of StaticASCIILiteral. | |
739 unsigned m_refCount; | |
740 unsigned m_length; | |
741 union { | |
742 const LChar* m_data8; | |
743 const UChar* m_data16; | |
744 }; | |
745 union { | |
746 void* m_buffer; | |
747 StringImpl* m_substringBuffer; | |
748 mutable UChar* m_copyData16; | |
749 }; | |
750 mutable unsigned m_hashAndFlags; | |
751 }; | |
752 | |
753 COMPILE_ASSERT(sizeof(StringImpl) == sizeof(StringImpl::StaticASCIILiteral), Str
ingImpl_should_match_its_StaticASCIILiteral); | |
754 | |
755 #if !ASSERT_DISABLED | |
756 // StringImpls created from StaticASCIILiteral will ASSERT | |
757 // in the generic ValueCheck<T>::checkConsistency | |
758 // as they are not allocated by fastMalloc. | |
759 // We don't currently have any way to detect that case | |
760 // so we ignore the consistency check for all StringImpl*. | |
761 template<> struct | |
762 ValueCheck<StringImpl*> { | |
763 static void checkConsistency(const StringImpl*) { } | |
764 }; | |
765 #endif | |
766 | |
767 template <> | |
768 ALWAYS_INLINE const LChar* StringImpl::getCharacters<LChar>() const { return cha
racters8(); } | |
769 | |
770 template <> | |
771 ALWAYS_INLINE const UChar* StringImpl::getCharacters<UChar>() const { return cha
racters(); } | |
772 | |
773 WTF_EXPORT_STRING_API bool equal(const StringImpl*, const StringImpl*); | |
774 WTF_EXPORT_STRING_API bool equal(const StringImpl*, const LChar*); | |
775 inline bool equal(const StringImpl* a, const char* b) { return equal(a, reinterp
ret_cast<const LChar*>(b)); } | |
776 WTF_EXPORT_STRING_API bool equal(const StringImpl*, const LChar*, unsigned); | |
777 inline bool equal(const StringImpl* a, const char* b, unsigned length) { return
equal(a, reinterpret_cast<const LChar*>(b), length); } | |
778 inline bool equal(const LChar* a, StringImpl* b) { return equal(b, a); } | |
779 inline bool equal(const char* a, StringImpl* b) { return equal(b, reinterpret_ca
st<const LChar*>(a)); } | |
780 WTF_EXPORT_STRING_API bool equal(const StringImpl*, const UChar*, unsigned); | |
781 WTF_EXPORT_STRING_API bool equalNonNull(const StringImpl* a, const StringImpl* b
); | |
782 | |
783 // Do comparisons 8 or 4 bytes-at-a-time on architectures where it's safe. | |
784 #if CPU(X86_64) | |
785 ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) | |
786 { | |
787 unsigned dwordLength = length >> 3; | |
788 | |
789 if (dwordLength) { | |
790 const uint64_t* aDWordCharacters = reinterpret_cast<const uint64_t*>(a); | |
791 const uint64_t* bDWordCharacters = reinterpret_cast<const uint64_t*>(b); | |
792 | |
793 for (unsigned i = 0; i != dwordLength; ++i) { | |
794 if (*aDWordCharacters++ != *bDWordCharacters++) | |
795 return false; | |
796 } | |
797 | |
798 a = reinterpret_cast<const LChar*>(aDWordCharacters); | |
799 b = reinterpret_cast<const LChar*>(bDWordCharacters); | |
800 } | |
801 | |
802 if (length & 4) { | |
803 if (*reinterpret_cast<const uint32_t*>(a) != *reinterpret_cast<const uin
t32_t*>(b)) | |
804 return false; | |
805 | |
806 a += 4; | |
807 b += 4; | |
808 } | |
809 | |
810 if (length & 2) { | |
811 if (*reinterpret_cast<const uint16_t*>(a) != *reinterpret_cast<const uin
t16_t*>(b)) | |
812 return false; | |
813 | |
814 a += 2; | |
815 b += 2; | |
816 } | |
817 | |
818 if (length & 1 && (*a != *b)) | |
819 return false; | |
820 | |
821 return true; | |
822 } | |
823 | |
824 ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) | |
825 { | |
826 unsigned dwordLength = length >> 2; | |
827 | |
828 if (dwordLength) { | |
829 const uint64_t* aDWordCharacters = reinterpret_cast<const uint64_t*>(a); | |
830 const uint64_t* bDWordCharacters = reinterpret_cast<const uint64_t*>(b); | |
831 | |
832 for (unsigned i = 0; i != dwordLength; ++i) { | |
833 if (*aDWordCharacters++ != *bDWordCharacters++) | |
834 return false; | |
835 } | |
836 | |
837 a = reinterpret_cast<const UChar*>(aDWordCharacters); | |
838 b = reinterpret_cast<const UChar*>(bDWordCharacters); | |
839 } | |
840 | |
841 if (length & 2) { | |
842 if (*reinterpret_cast<const uint32_t*>(a) != *reinterpret_cast<const uin
t32_t*>(b)) | |
843 return false; | |
844 | |
845 a += 2; | |
846 b += 2; | |
847 } | |
848 | |
849 if (length & 1 && (*a != *b)) | |
850 return false; | |
851 | |
852 return true; | |
853 } | |
854 #elif CPU(X86) | |
855 ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) | |
856 { | |
857 const uint32_t* aCharacters = reinterpret_cast<const uint32_t*>(a); | |
858 const uint32_t* bCharacters = reinterpret_cast<const uint32_t*>(b); | |
859 | |
860 unsigned wordLength = length >> 2; | |
861 for (unsigned i = 0; i != wordLength; ++i) { | |
862 if (*aCharacters++ != *bCharacters++) | |
863 return false; | |
864 } | |
865 | |
866 length &= 3; | |
867 | |
868 if (length) { | |
869 const LChar* aRemainder = reinterpret_cast<const LChar*>(aCharacters); | |
870 const LChar* bRemainder = reinterpret_cast<const LChar*>(bCharacters); | |
871 | |
872 for (unsigned i = 0; i < length; ++i) { | |
873 if (aRemainder[i] != bRemainder[i]) | |
874 return false; | |
875 } | |
876 } | |
877 | |
878 return true; | |
879 } | |
880 | |
881 ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) | |
882 { | |
883 const uint32_t* aCharacters = reinterpret_cast<const uint32_t*>(a); | |
884 const uint32_t* bCharacters = reinterpret_cast<const uint32_t*>(b); | |
885 | |
886 unsigned wordLength = length >> 1; | |
887 for (unsigned i = 0; i != wordLength; ++i) { | |
888 if (*aCharacters++ != *bCharacters++) | |
889 return false; | |
890 } | |
891 | |
892 if (length & 1 && *reinterpret_cast<const UChar*>(aCharacters) != *reinterpr
et_cast<const UChar*>(bCharacters)) | |
893 return false; | |
894 | |
895 return true; | |
896 } | |
897 #else | |
898 ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) | |
899 { | |
900 for (unsigned i = 0; i != length; ++i) { | |
901 if (a[i] != b[i]) | |
902 return false; | |
903 } | |
904 | |
905 return true; | |
906 } | |
907 | |
908 ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) | |
909 { | |
910 for (unsigned i = 0; i != length; ++i) { | |
911 if (a[i] != b[i]) | |
912 return false; | |
913 } | |
914 | |
915 return true; | |
916 } | |
917 #endif | |
918 | |
919 ALWAYS_INLINE bool equal(const LChar* a, const UChar* b, unsigned length) | |
920 { | |
921 for (unsigned i = 0; i != length; ++i) { | |
922 if (a[i] != b[i]) | |
923 return false; | |
924 } | |
925 | |
926 return true; | |
927 } | |
928 | |
929 ALWAYS_INLINE bool equal(const UChar* a, const LChar* b, unsigned length) | |
930 { | |
931 for (unsigned i = 0; i != length; ++i) { | |
932 if (a[i] != b[i]) | |
933 return false; | |
934 } | |
935 | |
936 return true; | |
937 } | |
938 | |
939 WTF_EXPORT_STRING_API bool equalIgnoringCase(const StringImpl*, const StringImpl
*); | |
940 WTF_EXPORT_STRING_API bool equalIgnoringCase(const StringImpl*, const LChar*); | |
941 inline bool equalIgnoringCase(const LChar* a, const StringImpl* b) { return equa
lIgnoringCase(b, a); } | |
942 WTF_EXPORT_STRING_API bool equalIgnoringCase(const LChar*, const LChar*, unsigne
d); | |
943 WTF_EXPORT_STRING_API bool equalIgnoringCase(const UChar*, const LChar*, unsigne
d); | |
944 inline bool equalIgnoringCase(const UChar* a, const char* b, unsigned length) {
return equalIgnoringCase(a, reinterpret_cast<const LChar*>(b), length); } | |
945 inline bool equalIgnoringCase(const LChar* a, const UChar* b, unsigned length) {
return equalIgnoringCase(b, a, length); } | |
946 inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) {
return equalIgnoringCase(b, reinterpret_cast<const LChar*>(a), length); } | |
947 inline bool equalIgnoringCase(const char* a, const LChar* b, unsigned length) {
return equalIgnoringCase(b, reinterpret_cast<const LChar*>(a), length); } | |
948 inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length) | |
949 { | |
950 ASSERT(length >= 0); | |
951 return !Unicode::umemcasecmp(a, b, length); | |
952 } | |
953 WTF_EXPORT_STRING_API bool equalIgnoringCaseNonNull(const StringImpl*, const Str
ingImpl*); | |
954 | |
955 WTF_EXPORT_STRING_API bool equalIgnoringNullity(StringImpl*, StringImpl*); | |
956 | |
957 template<typename CharacterType> | |
958 inline size_t find(const CharacterType* characters, unsigned length, CharacterTy
pe matchCharacter, unsigned index = 0) | |
959 { | |
960 while (index < length) { | |
961 if (characters[index] == matchCharacter) | |
962 return index; | |
963 ++index; | |
964 } | |
965 return notFound; | |
966 } | |
967 | |
968 ALWAYS_INLINE size_t find(const UChar* characters, unsigned length, LChar matchC
haracter, unsigned index = 0) | |
969 { | |
970 return find(characters, length, static_cast<UChar>(matchCharacter), index); | |
971 } | |
972 | |
973 inline size_t find(const LChar* characters, unsigned length, UChar matchCharacte
r, unsigned index = 0) | |
974 { | |
975 if (matchCharacter & ~0xFF) | |
976 return notFound; | |
977 return find(characters, length, static_cast<LChar>(matchCharacter), index); | |
978 } | |
979 | |
980 inline size_t find(const LChar* characters, unsigned length, CharacterMatchFunct
ionPtr matchFunction, unsigned index = 0) | |
981 { | |
982 while (index < length) { | |
983 if (matchFunction(characters[index])) | |
984 return index; | |
985 ++index; | |
986 } | |
987 return notFound; | |
988 } | |
989 | |
990 inline size_t find(const UChar* characters, unsigned length, CharacterMatchFunct
ionPtr matchFunction, unsigned index = 0) | |
991 { | |
992 while (index < length) { | |
993 if (matchFunction(characters[index])) | |
994 return index; | |
995 ++index; | |
996 } | |
997 return notFound; | |
998 } | |
999 | |
1000 template<typename CharacterType> | |
1001 inline size_t findNextLineStart(const CharacterType* characters, unsigned length
, unsigned index = 0) | |
1002 { | |
1003 while (index < length) { | |
1004 CharacterType c = characters[index++]; | |
1005 if ((c != '\n') && (c != '\r')) | |
1006 continue; | |
1007 | |
1008 // There can only be a start of a new line if there are more characters | |
1009 // beyond the current character. | |
1010 if (index < length) { | |
1011 // The 3 common types of line terminators are 1. \r\n (Windows), | |
1012 // 2. \r (old MacOS) and 3. \n (Unix'es). | |
1013 | |
1014 if (c == '\n') | |
1015 return index; // Case 3: just \n. | |
1016 | |
1017 CharacterType c2 = characters[index]; | |
1018 if (c2 != '\n') | |
1019 return index; // Case 2: just \r. | |
1020 | |
1021 // Case 1: \r\n. | |
1022 // But, there's only a start of a new line if there are more | |
1023 // characters beyond the \r\n. | |
1024 if (++index < length) | |
1025 return index; | |
1026 } | |
1027 } | |
1028 return notFound; | |
1029 } | |
1030 | |
1031 template<typename CharacterType> | |
1032 inline size_t reverseFindLineTerminator(const CharacterType* characters, unsigne
d length, unsigned index = UINT_MAX) | |
1033 { | |
1034 if (!length) | |
1035 return notFound; | |
1036 if (index >= length) | |
1037 index = length - 1; | |
1038 CharacterType c = characters[index]; | |
1039 while ((c != '\n') && (c != '\r')) { | |
1040 if (!index--) | |
1041 return notFound; | |
1042 c = characters[index]; | |
1043 } | |
1044 return index; | |
1045 } | |
1046 | |
1047 template<typename CharacterType> | |
1048 inline size_t reverseFind(const CharacterType* characters, unsigned length, Char
acterType matchCharacter, unsigned index = UINT_MAX) | |
1049 { | |
1050 if (!length) | |
1051 return notFound; | |
1052 if (index >= length) | |
1053 index = length - 1; | |
1054 while (characters[index] != matchCharacter) { | |
1055 if (!index--) | |
1056 return notFound; | |
1057 } | |
1058 return index; | |
1059 } | |
1060 | |
1061 ALWAYS_INLINE size_t reverseFind(const UChar* characters, unsigned length, LChar
matchCharacter, unsigned index = UINT_MAX) | |
1062 { | |
1063 return reverseFind(characters, length, static_cast<UChar>(matchCharacter), i
ndex); | |
1064 } | |
1065 | |
1066 inline size_t reverseFind(const LChar* characters, unsigned length, UChar matchC
haracter, unsigned index = UINT_MAX) | |
1067 { | |
1068 if (matchCharacter & ~0xFF) | |
1069 return notFound; | |
1070 return reverseFind(characters, length, static_cast<LChar>(matchCharacter), i
ndex); | |
1071 } | |
1072 | |
1073 inline size_t StringImpl::find(LChar character, unsigned start) | |
1074 { | |
1075 if (is8Bit()) | |
1076 return WTF::find(characters8(), m_length, character, start); | |
1077 return WTF::find(characters16(), m_length, character, start); | |
1078 } | |
1079 | |
1080 ALWAYS_INLINE size_t StringImpl::find(char character, unsigned start) | |
1081 { | |
1082 return find(static_cast<LChar>(character), start); | |
1083 } | |
1084 | |
1085 inline size_t StringImpl::find(UChar character, unsigned start) | |
1086 { | |
1087 if (is8Bit()) | |
1088 return WTF::find(characters8(), m_length, character, start); | |
1089 return WTF::find(characters16(), m_length, character, start); | |
1090 } | |
1091 | |
1092 template<size_t inlineCapacity> | |
1093 bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, StringImpl* b) | |
1094 { | |
1095 if (!b) | |
1096 return !a.size(); | |
1097 if (a.size() != b->length()) | |
1098 return false; | |
1099 return !memcmp(a.data(), b->characters(), b->length() * sizeof(UChar)); | |
1100 } | |
1101 | |
1102 template<typename CharacterType1, typename CharacterType2> | |
1103 static inline int codePointCompare(unsigned l1, unsigned l2, const CharacterType
1* c1, const CharacterType2* c2) | |
1104 { | |
1105 const unsigned lmin = l1 < l2 ? l1 : l2; | |
1106 unsigned pos = 0; | |
1107 while (pos < lmin && *c1 == *c2) { | |
1108 ++c1; | |
1109 ++c2; | |
1110 ++pos; | |
1111 } | |
1112 | |
1113 if (pos < lmin) | |
1114 return (c1[0] > c2[0]) ? 1 : -1; | |
1115 | |
1116 if (l1 == l2) | |
1117 return 0; | |
1118 | |
1119 return (l1 > l2) ? 1 : -1; | |
1120 } | |
1121 | |
1122 static inline int codePointCompare8(const StringImpl* string1, const StringImpl*
string2) | |
1123 { | |
1124 return codePointCompare(string1->length(), string2->length(), string1->chara
cters8(), string2->characters8()); | |
1125 } | |
1126 | |
1127 static inline int codePointCompare16(const StringImpl* string1, const StringImpl
* string2) | |
1128 { | |
1129 return codePointCompare(string1->length(), string2->length(), string1->chara
cters16(), string2->characters16()); | |
1130 } | |
1131 | |
1132 static inline int codePointCompare8To16(const StringImpl* string1, const StringI
mpl* string2) | |
1133 { | |
1134 return codePointCompare(string1->length(), string2->length(), string1->chara
cters8(), string2->characters16()); | |
1135 } | |
1136 | |
1137 static inline int codePointCompare(const StringImpl* string1, const StringImpl*
string2) | |
1138 { | |
1139 if (!string1) | |
1140 return (string2 && string2->length()) ? -1 : 0; | |
1141 | |
1142 if (!string2) | |
1143 return string1->length() ? 1 : 0; | |
1144 | |
1145 bool string1Is8Bit = string1->is8Bit(); | |
1146 bool string2Is8Bit = string2->is8Bit(); | |
1147 if (string1Is8Bit) { | |
1148 if (string2Is8Bit) | |
1149 return codePointCompare8(string1, string2); | |
1150 return codePointCompare8To16(string1, string2); | |
1151 } | |
1152 if (string2Is8Bit) | |
1153 return -codePointCompare8To16(string2, string1); | |
1154 return codePointCompare16(string1, string2); | |
1155 } | |
1156 | |
1157 static inline bool isSpaceOrNewline(UChar c) | |
1158 { | |
1159 // Use isASCIISpace() for basic Latin-1. | |
1160 // This will include newlines, which aren't included in Unicode DirWS. | |
1161 return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF:
:Unicode::WhiteSpaceNeutral; | |
1162 } | |
1163 | |
1164 inline PassRefPtr<StringImpl> StringImpl::isolatedCopy() const | |
1165 { | |
1166 if (isASCIILiteral()) | |
1167 return StringImpl::createFromLiteral(reinterpret_cast<const char*>(m_dat
a8), m_length); | |
1168 if (is8Bit()) | |
1169 return create(m_data8, m_length); | |
1170 return create(m_data16, m_length); | |
1171 } | |
1172 | |
1173 struct StringHash; | |
1174 | |
1175 // StringHash is the default hash for StringImpl* and RefPtr<StringImpl> | |
1176 template<typename T> struct DefaultHash; | |
1177 template<> struct DefaultHash<StringImpl*> { | |
1178 typedef StringHash Hash; | |
1179 }; | |
1180 template<> struct DefaultHash<RefPtr<StringImpl> > { | |
1181 typedef StringHash Hash; | |
1182 }; | |
1183 | |
1184 } | |
1185 | |
1186 using WTF::StringImpl; | |
1187 using WTF::equal; | |
1188 using WTF::equalNonNull; | |
1189 using WTF::TextCaseSensitivity; | |
1190 using WTF::TextCaseSensitive; | |
1191 using WTF::TextCaseInsensitive; | |
1192 | |
1193 #endif | |
OLD | NEW |