| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org) | |
| 3 * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2013 Apple Inc. All rights
reserved. | |
| 4 * Copyright (C) 2009 Google Inc. All rights reserved. | |
| 5 * | |
| 6 * This library is free software; you can redistribute it and/or | |
| 7 * modify it under the terms of the GNU Library General Public | |
| 8 * License as published by the Free Software Foundation; either | |
| 9 * version 2 of the License, or (at your option) any later version. | |
| 10 * | |
| 11 * This library is distributed in the hope that it will be useful, | |
| 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 14 * Library General Public License for more details. | |
| 15 * | |
| 16 * You should have received a copy of the GNU Library General Public License | |
| 17 * along with this library; see the file COPYING.LIB. If not, write to | |
| 18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
| 19 * Boston, MA 02110-1301, USA. | |
| 20 * | |
| 21 */ | |
| 22 | |
| 23 #ifndef StringImpl_h | |
| 24 #define StringImpl_h | |
| 25 | |
| 26 #include <limits.h> | |
| 27 #include <wtf/ASCIICType.h> | |
| 28 #include <wtf/Forward.h> | |
| 29 #include <wtf/StdLibExtras.h> | |
| 30 #include <wtf/StringHasher.h> | |
| 31 #include <wtf/Vector.h> | |
| 32 #include <wtf/unicode/Unicode.h> | |
| 33 | |
| 34 #if USE(CF) | |
| 35 typedef const struct __CFString * CFStringRef; | |
| 36 #endif | |
| 37 | |
| 38 #ifdef __OBJC__ | |
| 39 @class NSString; | |
| 40 #endif | |
| 41 | |
| 42 namespace WTF { | |
| 43 | |
| 44 struct CStringTranslator; | |
| 45 template<typename CharacterType> struct HashAndCharactersTranslator; | |
| 46 struct HashAndUTF8CharactersTranslator; | |
| 47 struct LCharBufferTranslator; | |
| 48 struct CharBufferFromLiteralDataTranslator; | |
| 49 class MemoryObjectInfo; | |
| 50 struct SubstringTranslator; | |
| 51 struct UCharBufferTranslator; | |
| 52 template<typename> class RetainPtr; | |
| 53 | |
| 54 enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive }; | |
| 55 | |
| 56 typedef bool (*CharacterMatchFunctionPtr)(UChar); | |
| 57 typedef bool (*IsWhiteSpaceFunctionPtr)(UChar); | |
| 58 | |
| 59 // Define STRING_STATS to turn on run time statistics of string sizes and memory
usage | |
| 60 #undef STRING_STATS | |
| 61 | |
| 62 #ifdef STRING_STATS | |
| 63 struct StringStats { | |
| 64 inline void add8BitString(unsigned length, bool isSubString = false) | |
| 65 { | |
| 66 ++m_totalNumberStrings; | |
| 67 ++m_number8BitStrings; | |
| 68 if (!isSubString) | |
| 69 m_total8BitData += length; | |
| 70 } | |
| 71 | |
| 72 inline void add16BitString(unsigned length, bool isSubString = false) | |
| 73 { | |
| 74 ++m_totalNumberStrings; | |
| 75 ++m_number16BitStrings; | |
| 76 if (!isSubString) | |
| 77 m_total16BitData += length; | |
| 78 } | |
| 79 | |
| 80 inline void addUpconvertedString(unsigned length) | |
| 81 { | |
| 82 ++m_numberUpconvertedStrings; | |
| 83 m_totalUpconvertedData += length; | |
| 84 } | |
| 85 | |
| 86 void removeString(StringImpl*); | |
| 87 void printStats(); | |
| 88 | |
| 89 static const unsigned s_printStringStatsFrequency = 5000; | |
| 90 static unsigned s_stringRemovesTillPrintStats; | |
| 91 | |
| 92 unsigned m_totalNumberStrings; | |
| 93 unsigned m_number8BitStrings; | |
| 94 unsigned m_number16BitStrings; | |
| 95 unsigned m_numberUpconvertedStrings; | |
| 96 unsigned long long m_total8BitData; | |
| 97 unsigned long long m_total16BitData; | |
| 98 unsigned long long m_totalUpconvertedData; | |
| 99 }; | |
| 100 | |
| 101 #define STRING_STATS_ADD_8BIT_STRING(length) StringImpl::stringStats().add8BitSt
ring(length) | |
| 102 #define STRING_STATS_ADD_8BIT_STRING2(length, isSubString) StringImpl::stringSta
ts().add8BitString(length, isSubString) | |
| 103 #define STRING_STATS_ADD_16BIT_STRING(length) StringImpl::stringStats().add16Bit
String(length) | |
| 104 #define STRING_STATS_ADD_16BIT_STRING2(length, isSubString) StringImpl::stringSt
ats().add16BitString(length, isSubString) | |
| 105 #define STRING_STATS_ADD_UPCONVERTED_STRING(length) StringImpl::stringStats().ad
dUpconvertedString(length) | |
| 106 #define STRING_STATS_REMOVE_STRING(string) StringImpl::stringStats().removeStrin
g(string) | |
| 107 #else | |
| 108 #define STRING_STATS_ADD_8BIT_STRING(length) ((void)0) | |
| 109 #define STRING_STATS_ADD_8BIT_STRING2(length, isSubString) ((void)0) | |
| 110 #define STRING_STATS_ADD_16BIT_STRING(length) ((void)0) | |
| 111 #define STRING_STATS_ADD_16BIT_STRING2(length, isSubString) ((void)0) | |
| 112 #define STRING_STATS_ADD_UPCONVERTED_STRING(length) ((void)0) | |
| 113 #define STRING_STATS_REMOVE_STRING(string) ((void)0) | |
| 114 #endif | |
| 115 | |
| 116 class StringImpl { | |
| 117 WTF_MAKE_NONCOPYABLE(StringImpl); WTF_MAKE_FAST_ALLOCATED; | |
| 118 friend struct WTF::CStringTranslator; | |
| 119 template<typename CharacterType> friend struct WTF::HashAndCharactersTransla
tor; | |
| 120 friend struct WTF::HashAndUTF8CharactersTranslator; | |
| 121 friend struct WTF::CharBufferFromLiteralDataTranslator; | |
| 122 friend struct WTF::LCharBufferTranslator; | |
| 123 friend struct WTF::SubstringTranslator; | |
| 124 friend struct WTF::UCharBufferTranslator; | |
| 125 friend class AtomicStringImpl; | |
| 126 | |
| 127 private: | |
| 128 enum BufferOwnership { | |
| 129 BufferInternal, | |
| 130 BufferOwned, | |
| 131 BufferSubstring, | |
| 132 // NOTE: Adding more ownership types needs to extend m_hashAndFlags as w
e're at capacity | |
| 133 }; | |
| 134 | |
| 135 // Used to construct static strings, which have an special refCount that can
never hit zero. | |
| 136 // This means that the static string will never be destroyed, which is impor
tant because | |
| 137 // static strings will be shared across threads & ref-counted in a non-threa
dsafe manner. | |
| 138 enum ConstructStaticStringTag { ConstructStaticString }; | |
| 139 StringImpl(const UChar* characters, unsigned length, ConstructStaticStringTa
g) | |
| 140 : m_refCount(s_refCountFlagIsStaticString) | |
| 141 , m_length(length) | |
| 142 , m_data16(characters) | |
| 143 , m_buffer(0) | |
| 144 , m_hashAndFlags(s_hashFlagIsIdentifier | BufferOwned) | |
| 145 { | |
| 146 // Ensure that the hash is computed so that AtomicStringHash can call ex
istingHash() | |
| 147 // with impunity. The empty string is special because it is never entere
d into | |
| 148 // AtomicString's HashKey, but still needs to compare correctly. | |
| 149 STRING_STATS_ADD_16BIT_STRING(m_length); | |
| 150 | |
| 151 hash(); | |
| 152 } | |
| 153 | |
| 154 // Used to construct static strings, which have an special refCount that can
never hit zero. | |
| 155 // This means that the static string will never be destroyed, which is impor
tant because | |
| 156 // static strings will be shared across threads & ref-counted in a non-threa
dsafe manner. | |
| 157 StringImpl(const LChar* characters, unsigned length, ConstructStaticStringTa
g) | |
| 158 : m_refCount(s_refCountFlagIsStaticString) | |
| 159 , m_length(length) | |
| 160 , m_data8(characters) | |
| 161 , m_buffer(0) | |
| 162 , m_hashAndFlags(s_hashFlag8BitBuffer | s_hashFlagIsIdentifier | BufferO
wned) | |
| 163 { | |
| 164 // Ensure that the hash is computed so that AtomicStringHash can call ex
istingHash() | |
| 165 // with impunity. The empty string is special because it is never entere
d into | |
| 166 // AtomicString's HashKey, but still needs to compare correctly. | |
| 167 STRING_STATS_ADD_8BIT_STRING(m_length); | |
| 168 | |
| 169 hash(); | |
| 170 } | |
| 171 | |
| 172 // FIXME: there has to be a less hacky way to do this. | |
| 173 enum Force8Bit { Force8BitConstructor }; | |
| 174 // Create a normal 8-bit string with internal storage (BufferInternal) | |
| 175 StringImpl(unsigned length, Force8Bit) | |
| 176 : m_refCount(s_refCountIncrement) | |
| 177 , m_length(length) | |
| 178 , m_data8(reinterpret_cast<const LChar*>(this + 1)) | |
| 179 , m_buffer(0) | |
| 180 , m_hashAndFlags(s_hashFlag8BitBuffer | BufferInternal) | |
| 181 { | |
| 182 ASSERT(m_data8); | |
| 183 ASSERT(m_length); | |
| 184 | |
| 185 STRING_STATS_ADD_8BIT_STRING(m_length); | |
| 186 } | |
| 187 | |
| 188 // Create a normal 16-bit string with internal storage (BufferInternal) | |
| 189 StringImpl(unsigned length) | |
| 190 : m_refCount(s_refCountIncrement) | |
| 191 , m_length(length) | |
| 192 , m_data16(reinterpret_cast<const UChar*>(this + 1)) | |
| 193 , m_buffer(0) | |
| 194 , m_hashAndFlags(BufferInternal) | |
| 195 { | |
| 196 ASSERT(m_data16); | |
| 197 ASSERT(m_length); | |
| 198 | |
| 199 STRING_STATS_ADD_16BIT_STRING(m_length); | |
| 200 } | |
| 201 | |
| 202 // Create a StringImpl adopting ownership of the provided buffer (BufferOwne
d) | |
| 203 StringImpl(const LChar* characters, unsigned length) | |
| 204 : m_refCount(s_refCountIncrement) | |
| 205 , m_length(length) | |
| 206 , m_data8(characters) | |
| 207 , m_buffer(0) | |
| 208 , m_hashAndFlags(s_hashFlag8BitBuffer | BufferOwned) | |
| 209 { | |
| 210 ASSERT(m_data8); | |
| 211 ASSERT(m_length); | |
| 212 | |
| 213 STRING_STATS_ADD_8BIT_STRING(m_length); | |
| 214 } | |
| 215 | |
| 216 enum ConstructFromLiteralTag { ConstructFromLiteral }; | |
| 217 StringImpl(const char* characters, unsigned length, ConstructFromLiteralTag) | |
| 218 : m_refCount(s_refCountIncrement) | |
| 219 , m_length(length) | |
| 220 , m_data8(reinterpret_cast<const LChar*>(characters)) | |
| 221 , m_buffer(0) | |
| 222 , m_hashAndFlags(s_hashFlag8BitBuffer | BufferInternal | s_hashFlagHasTe
rminatingNullCharacter) | |
| 223 { | |
| 224 ASSERT(m_data8); | |
| 225 ASSERT(m_length); | |
| 226 ASSERT(!characters[length]); | |
| 227 | |
| 228 STRING_STATS_ADD_8BIT_STRING(0); | |
| 229 } | |
| 230 | |
| 231 // Create a StringImpl adopting ownership of the provided buffer (BufferOwne
d) | |
| 232 StringImpl(const UChar* characters, unsigned length) | |
| 233 : m_refCount(s_refCountIncrement) | |
| 234 , m_length(length) | |
| 235 , m_data16(characters) | |
| 236 , m_buffer(0) | |
| 237 , m_hashAndFlags(BufferOwned) | |
| 238 { | |
| 239 ASSERT(m_data16); | |
| 240 ASSERT(m_length); | |
| 241 | |
| 242 STRING_STATS_ADD_16BIT_STRING(m_length); | |
| 243 } | |
| 244 | |
| 245 // Used to create new strings that are a substring of an existing 8-bit Stri
ngImpl (BufferSubstring) | |
| 246 StringImpl(const LChar* characters, unsigned length, PassRefPtr<StringImpl>
base) | |
| 247 : m_refCount(s_refCountIncrement) | |
| 248 , m_length(length) | |
| 249 , m_data8(characters) | |
| 250 , m_substringBuffer(base.leakRef()) | |
| 251 , m_hashAndFlags(s_hashFlag8BitBuffer | BufferSubstring) | |
| 252 { | |
| 253 ASSERT(is8Bit()); | |
| 254 ASSERT(m_data8); | |
| 255 ASSERT(m_length); | |
| 256 ASSERT(m_substringBuffer->bufferOwnership() != BufferSubstring); | |
| 257 | |
| 258 STRING_STATS_ADD_8BIT_STRING2(m_length, true); | |
| 259 } | |
| 260 | |
| 261 // Used to create new strings that are a substring of an existing 16-bit Str
ingImpl (BufferSubstring) | |
| 262 StringImpl(const UChar* characters, unsigned length, PassRefPtr<StringImpl>
base) | |
| 263 : m_refCount(s_refCountIncrement) | |
| 264 , m_length(length) | |
| 265 , m_data16(characters) | |
| 266 , m_substringBuffer(base.leakRef()) | |
| 267 , m_hashAndFlags(BufferSubstring) | |
| 268 { | |
| 269 ASSERT(!is8Bit()); | |
| 270 ASSERT(m_data16); | |
| 271 ASSERT(m_length); | |
| 272 ASSERT(m_substringBuffer->bufferOwnership() != BufferSubstring); | |
| 273 | |
| 274 STRING_STATS_ADD_16BIT_STRING2(m_length, true); | |
| 275 } | |
| 276 | |
| 277 enum CreateEmptyUnique_T { CreateEmptyUnique }; | |
| 278 StringImpl(CreateEmptyUnique_T) | |
| 279 : m_refCount(s_refCountIncrement) | |
| 280 , m_length(0) | |
| 281 , m_data16(reinterpret_cast<const UChar*>(1)) | |
| 282 , m_buffer(0) | |
| 283 { | |
| 284 ASSERT(m_data16); | |
| 285 // Set the hash early, so that all empty unique StringImpls have a hash, | |
| 286 // and don't use the normal hashing algorithm - the unique nature of the
se | |
| 287 // keys means that we don't need them to match any other string (in fact
, | |
| 288 // that's exactly the oposite of what we want!), and teh normal hash wou
ld | |
| 289 // lead to lots of conflicts. | |
| 290 unsigned hash = reinterpret_cast<uintptr_t>(this); | |
| 291 hash <<= s_flagCount; | |
| 292 if (!hash) | |
| 293 hash = 1 << s_flagCount; | |
| 294 m_hashAndFlags = hash | BufferInternal; | |
| 295 | |
| 296 STRING_STATS_ADD_16BIT_STRING(m_length); | |
| 297 } | |
| 298 public: | |
| 299 WTF_EXPORT_STRING_API ~StringImpl(); | |
| 300 | |
| 301 WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> create(const UChar*, uns
igned length); | |
| 302 WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> create(const LChar*, uns
igned length); | |
| 303 WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> create8BitIfPossible(con
st UChar*, unsigned length); | |
| 304 template<size_t inlineCapacity> | |
| 305 static PassRefPtr<StringImpl> create8BitIfPossible(const Vector<UChar, inlin
eCapacity>& vector) | |
| 306 { | |
| 307 return create8BitIfPossible(vector.data(), vector.size()); | |
| 308 } | |
| 309 | |
| 310 ALWAYS_INLINE static PassRefPtr<StringImpl> create(const char* s, unsigned l
ength) { return create(reinterpret_cast<const LChar*>(s), length); } | |
| 311 WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> create(const LChar*); | |
| 312 ALWAYS_INLINE static PassRefPtr<StringImpl> create(const char* s) { return c
reate(reinterpret_cast<const LChar*>(s)); } | |
| 313 | |
| 314 static ALWAYS_INLINE PassRefPtr<StringImpl> create8(PassRefPtr<StringImpl> r
ep, unsigned offset, unsigned length) | |
| 315 { | |
| 316 ASSERT(rep); | |
| 317 ASSERT(length <= rep->length()); | |
| 318 | |
| 319 if (!length) | |
| 320 return empty(); | |
| 321 | |
| 322 ASSERT(rep->is8Bit()); | |
| 323 StringImpl* ownerRep = (rep->bufferOwnership() == BufferSubstring) ? rep
->m_substringBuffer : rep.get(); | |
| 324 return adoptRef(new StringImpl(rep->m_data8 + offset, length, ownerRep))
; | |
| 325 } | |
| 326 | |
| 327 static ALWAYS_INLINE PassRefPtr<StringImpl> create(PassRefPtr<StringImpl> re
p, unsigned offset, unsigned length) | |
| 328 { | |
| 329 ASSERT(rep); | |
| 330 ASSERT(length <= rep->length()); | |
| 331 | |
| 332 if (!length) | |
| 333 return empty(); | |
| 334 | |
| 335 StringImpl* ownerRep = (rep->bufferOwnership() == BufferSubstring) ? rep
->m_substringBuffer : rep.get(); | |
| 336 if (rep->is8Bit()) | |
| 337 return adoptRef(new StringImpl(rep->m_data8 + offset, length, ownerR
ep)); | |
| 338 return adoptRef(new StringImpl(rep->m_data16 + offset, length, ownerRep)
); | |
| 339 } | |
| 340 | |
| 341 WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> createFromLiteral(const
char* characters, unsigned length); | |
| 342 template<unsigned charactersCount> | |
| 343 ALWAYS_INLINE static PassRefPtr<StringImpl> createFromLiteral(const char (&c
haracters)[charactersCount]) | |
| 344 { | |
| 345 COMPILE_ASSERT(charactersCount > 1, StringImplFromLiteralNotEmpty); | |
| 346 COMPILE_ASSERT((charactersCount - 1 <= ((unsigned(~0) - sizeof(StringImp
l)) / sizeof(LChar))), StringImplFromLiteralCannotOverflow); | |
| 347 | |
| 348 return createFromLiteral(characters, charactersCount - 1); | |
| 349 } | |
| 350 WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> createFromLiteral(const
char* characters); | |
| 351 | |
| 352 WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> createUninitialized(unsi
gned length, LChar*& data); | |
| 353 WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> createUninitialized(unsi
gned length, UChar*& data); | |
| 354 template <typename T> static ALWAYS_INLINE PassRefPtr<StringImpl> tryCreateU
ninitialized(unsigned length, T*& output) | |
| 355 { | |
| 356 if (!length) { | |
| 357 output = 0; | |
| 358 return empty(); | |
| 359 } | |
| 360 | |
| 361 if (length > ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)
) / sizeof(T))) { | |
| 362 output = 0; | |
| 363 return 0; | |
| 364 } | |
| 365 StringImpl* resultImpl; | |
| 366 if (!tryFastMalloc(sizeof(T) * length + sizeof(StringImpl)).getValue(res
ultImpl)) { | |
| 367 output = 0; | |
| 368 return 0; | |
| 369 } | |
| 370 output = reinterpret_cast<T*>(resultImpl + 1); | |
| 371 | |
| 372 if (sizeof(T) == sizeof(char)) | |
| 373 return adoptRef(new (NotNull, resultImpl) StringImpl(length, Force8B
itConstructor)); | |
| 374 | |
| 375 return adoptRef(new (NotNull, resultImpl) StringImpl(length)); | |
| 376 } | |
| 377 | |
| 378 static PassRefPtr<StringImpl> createEmptyUnique() | |
| 379 { | |
| 380 return adoptRef(new StringImpl(CreateEmptyUnique)); | |
| 381 } | |
| 382 | |
| 383 // Reallocate the StringImpl. The originalString must be only owned by the P
assRefPtr, | |
| 384 // and the buffer ownership must be BufferInternal. Just like the input poin
ter of realloc(), | |
| 385 // the originalString can't be used after this function. | |
| 386 static PassRefPtr<StringImpl> reallocate(PassRefPtr<StringImpl> originalStri
ng, unsigned length, LChar*& data); | |
| 387 static PassRefPtr<StringImpl> reallocate(PassRefPtr<StringImpl> originalStri
ng, unsigned length, UChar*& data); | |
| 388 | |
| 389 static unsigned flagsOffset() { return OBJECT_OFFSETOF(StringImpl, m_hashAnd
Flags); } | |
| 390 static unsigned flagIs8Bit() { return s_hashFlag8BitBuffer; } | |
| 391 static unsigned dataOffset() { return OBJECT_OFFSETOF(StringImpl, m_data8);
} | |
| 392 static PassRefPtr<StringImpl> createWithTerminatingNullCharacter(const Strin
gImpl&); | |
| 393 | |
| 394 template<typename CharType, size_t inlineCapacity> | |
| 395 static PassRefPtr<StringImpl> adopt(Vector<CharType, inlineCapacity>& vector
) | |
| 396 { | |
| 397 if (size_t size = vector.size()) { | |
| 398 ASSERT(vector.data()); | |
| 399 RELEASE_ASSERT(size <= std::numeric_limits<unsigned>::max()); | |
| 400 return adoptRef(new StringImpl(vector.releaseBuffer(), size)); | |
| 401 } | |
| 402 return empty(); | |
| 403 } | |
| 404 | |
| 405 WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> adopt(StringBuffer<UChar
>&); | |
| 406 WTF_EXPORT_STRING_API static PassRefPtr<StringImpl> adopt(StringBuffer<LChar
>&); | |
| 407 | |
| 408 unsigned length() const { return m_length; } | |
| 409 bool is8Bit() const { return m_hashAndFlags & s_hashFlag8BitBuffer; } | |
| 410 bool hasInternalBuffer() const { return bufferOwnership() == BufferInternal;
} | |
| 411 bool hasOwnedBuffer() const { return bufferOwnership() == BufferOwned; } | |
| 412 StringImpl* baseString() const { return bufferOwnership() == BufferSubstring
? m_substringBuffer : 0; } | |
| 413 | |
| 414 // FIXME: Remove all unnecessary usages of characters() | |
| 415 ALWAYS_INLINE const LChar* characters8() const { ASSERT(is8Bit()); return m_
data8; } | |
| 416 ALWAYS_INLINE const UChar* characters16() const { ASSERT(!is8Bit()); return
m_data16; } | |
| 417 ALWAYS_INLINE const UChar* characters() const | |
| 418 { | |
| 419 if (!is8Bit()) | |
| 420 return m_data16; | |
| 421 | |
| 422 return getData16SlowCase(); | |
| 423 } | |
| 424 | |
| 425 template <typename CharType> | |
| 426 ALWAYS_INLINE const CharType * getCharacters() const; | |
| 427 | |
| 428 size_t cost() | |
| 429 { | |
| 430 // For substrings, return the cost of the base string. | |
| 431 if (bufferOwnership() == BufferSubstring) | |
| 432 return m_substringBuffer->cost(); | |
| 433 | |
| 434 if (m_hashAndFlags & s_hashFlagDidReportCost) | |
| 435 return 0; | |
| 436 | |
| 437 m_hashAndFlags |= s_hashFlagDidReportCost; | |
| 438 return m_length; | |
| 439 } | |
| 440 | |
| 441 WTF_EXPORT_STRING_API size_t sizeInBytes() const; | |
| 442 | |
| 443 bool has16BitShadow() const { return m_hashAndFlags & s_hashFlagHas16BitShad
ow; } | |
| 444 WTF_EXPORT_STRING_API void upconvertCharacters(unsigned, unsigned) const; | |
| 445 bool isIdentifier() const { return m_hashAndFlags & s_hashFlagIsIdentifier;
} | |
| 446 void setIsIdentifier(bool isIdentifier) | |
| 447 { | |
| 448 ASSERT(!isStatic()); | |
| 449 if (isIdentifier) | |
| 450 m_hashAndFlags |= s_hashFlagIsIdentifier; | |
| 451 else | |
| 452 m_hashAndFlags &= ~s_hashFlagIsIdentifier; | |
| 453 } | |
| 454 | |
| 455 bool isEmptyUnique() const | |
| 456 { | |
| 457 return !length() && !isStatic(); | |
| 458 } | |
| 459 | |
| 460 bool hasTerminatingNullCharacter() const { return m_hashAndFlags & s_hashFla
gHasTerminatingNullCharacter; } | |
| 461 | |
| 462 bool isAtomic() const { return m_hashAndFlags & s_hashFlagIsAtomic; } | |
| 463 void setIsAtomic(bool isAtomic) | |
| 464 { | |
| 465 if (isAtomic) | |
| 466 m_hashAndFlags |= s_hashFlagIsAtomic; | |
| 467 else | |
| 468 m_hashAndFlags &= ~s_hashFlagIsAtomic; | |
| 469 } | |
| 470 | |
| 471 bool isStatic() const { return m_refCount & s_refCountFlagIsStaticString; } | |
| 472 | |
| 473 #ifdef STRING_STATS | |
| 474 bool isSubString() const { return bufferOwnership() == BufferSubstring; } | |
| 475 #endif | |
| 476 | |
| 477 private: | |
| 478 // The high bits of 'hash' are always empty, but we prefer to store our flag
s | |
| 479 // in the low bits because it makes them slightly more efficient to access. | |
| 480 // So, we shift left and right when setting and getting our hash code. | |
| 481 void setHash(unsigned hash) const | |
| 482 { | |
| 483 ASSERT(!hasHash()); | |
| 484 // Multiple clients assume that StringHasher is the canonical string has
h function. | |
| 485 ASSERT(hash == (is8Bit() ? StringHasher::computeHashAndMaskTop8Bits(m_da
ta8, m_length) : StringHasher::computeHashAndMaskTop8Bits(m_data16, m_length))); | |
| 486 ASSERT(!(hash & (s_flagMask << (8 * sizeof(hash) - s_flagCount)))); // V
erify that enough high bits are empty. | |
| 487 | |
| 488 hash <<= s_flagCount; | |
| 489 ASSERT(!(hash & m_hashAndFlags)); // Verify that enough low bits are emp
ty after shift. | |
| 490 ASSERT(hash); // Verify that 0 is a valid sentinel hash value. | |
| 491 | |
| 492 m_hashAndFlags |= hash; // Store hash with flags in low bits. | |
| 493 } | |
| 494 | |
| 495 unsigned rawHash() const | |
| 496 { | |
| 497 return m_hashAndFlags >> s_flagCount; | |
| 498 } | |
| 499 | |
| 500 public: | |
| 501 bool hasHash() const | |
| 502 { | |
| 503 return rawHash() != 0; | |
| 504 } | |
| 505 | |
| 506 unsigned existingHash() const | |
| 507 { | |
| 508 ASSERT(hasHash()); | |
| 509 return rawHash(); | |
| 510 } | |
| 511 | |
| 512 unsigned hash() const | |
| 513 { | |
| 514 if (hasHash()) | |
| 515 return existingHash(); | |
| 516 return hashSlowCase(); | |
| 517 } | |
| 518 | |
| 519 inline bool hasOneRef() const | |
| 520 { | |
| 521 return m_refCount == s_refCountIncrement; | |
| 522 } | |
| 523 | |
| 524 inline void ref() | |
| 525 { | |
| 526 m_refCount += s_refCountIncrement; | |
| 527 } | |
| 528 | |
| 529 inline void deref() | |
| 530 { | |
| 531 if (m_refCount == s_refCountIncrement) { | |
| 532 delete this; | |
| 533 return; | |
| 534 } | |
| 535 | |
| 536 m_refCount -= s_refCountIncrement; | |
| 537 } | |
| 538 | |
| 539 WTF_EXPORT_PRIVATE static StringImpl* empty(); | |
| 540 | |
| 541 // FIXME: Does this really belong in StringImpl? | |
| 542 template <typename T> static void copyChars(T* destination, const T* source,
unsigned numCharacters) | |
| 543 { | |
| 544 if (numCharacters == 1) { | |
| 545 *destination = *source; | |
| 546 return; | |
| 547 } | |
| 548 | |
| 549 if (numCharacters <= s_copyCharsInlineCutOff) { | |
| 550 unsigned i = 0; | |
| 551 #if (CPU(X86) || CPU(X86_64)) | |
| 552 const unsigned charsPerInt = sizeof(uint32_t) / sizeof(T); | |
| 553 | |
| 554 if (numCharacters > charsPerInt) { | |
| 555 unsigned stopCount = numCharacters & ~(charsPerInt - 1); | |
| 556 | |
| 557 const uint32_t* srcCharacters = reinterpret_cast<const uint32_t*
>(source); | |
| 558 uint32_t* destCharacters = reinterpret_cast<uint32_t*>(destinati
on); | |
| 559 for (unsigned j = 0; i < stopCount; i += charsPerInt, ++j) | |
| 560 destCharacters[j] = srcCharacters[j]; | |
| 561 } | |
| 562 #endif | |
| 563 for (; i < numCharacters; ++i) | |
| 564 destination[i] = source[i]; | |
| 565 } else | |
| 566 memcpy(destination, source, numCharacters * sizeof(T)); | |
| 567 } | |
| 568 | |
| 569 ALWAYS_INLINE static void copyChars(UChar* destination, const LChar* source,
unsigned numCharacters) | |
| 570 { | |
| 571 for (unsigned i = 0; i < numCharacters; ++i) | |
| 572 destination[i] = source[i]; | |
| 573 } | |
| 574 | |
| 575 // Some string features, like refcounting and the atomicity flag, are not | |
| 576 // thread-safe. We achieve thread safety by isolation, giving each thread | |
| 577 // its own copy of the string. | |
| 578 PassRefPtr<StringImpl> isolatedCopy() const; | |
| 579 | |
| 580 WTF_EXPORT_STRING_API PassRefPtr<StringImpl> substring(unsigned pos, unsigne
d len = UINT_MAX); | |
| 581 | |
| 582 UChar operator[](unsigned i) const | |
| 583 { | |
| 584 ASSERT_WITH_SECURITY_IMPLICATION(i < m_length); | |
| 585 if (is8Bit()) | |
| 586 return m_data8[i]; | |
| 587 return m_data16[i]; | |
| 588 } | |
| 589 WTF_EXPORT_STRING_API UChar32 characterStartingAt(unsigned); | |
| 590 | |
| 591 WTF_EXPORT_STRING_API bool containsOnlyWhitespace(); | |
| 592 | |
| 593 int toIntStrict(bool* ok = 0, int base = 10); | |
| 594 unsigned toUIntStrict(bool* ok = 0, int base = 10); | |
| 595 int64_t toInt64Strict(bool* ok = 0, int base = 10); | |
| 596 uint64_t toUInt64Strict(bool* ok = 0, int base = 10); | |
| 597 intptr_t toIntPtrStrict(bool* ok = 0, int base = 10); | |
| 598 | |
| 599 WTF_EXPORT_STRING_API int toInt(bool* ok = 0); // ignores trailing garbage | |
| 600 unsigned toUInt(bool* ok = 0); // ignores trailing garbage | |
| 601 int64_t toInt64(bool* ok = 0); // ignores trailing garbage | |
| 602 uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage | |
| 603 intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage | |
| 604 | |
| 605 // FIXME: Like the strict functions above, these give false for "ok" when th
ere is trailing garbage. | |
| 606 // Like the non-strict functions above, these return the value when there is
trailing garbage. | |
| 607 // It would be better if these were more consistent with the above functions
instead. | |
| 608 double toDouble(bool* ok = 0); | |
| 609 float toFloat(bool* ok = 0); | |
| 610 | |
| 611 WTF_EXPORT_STRING_API PassRefPtr<StringImpl> lower(); | |
| 612 WTF_EXPORT_STRING_API PassRefPtr<StringImpl> upper(); | |
| 613 | |
| 614 WTF_EXPORT_STRING_API PassRefPtr<StringImpl> fill(UChar); | |
| 615 // FIXME: Do we need fill(char) or can we just do the right thing if UChar i
s ASCII? | |
| 616 PassRefPtr<StringImpl> foldCase(); | |
| 617 | |
| 618 PassRefPtr<StringImpl> stripWhiteSpace(); | |
| 619 PassRefPtr<StringImpl> stripWhiteSpace(IsWhiteSpaceFunctionPtr); | |
| 620 WTF_EXPORT_STRING_API PassRefPtr<StringImpl> simplifyWhiteSpace(); | |
| 621 PassRefPtr<StringImpl> simplifyWhiteSpace(IsWhiteSpaceFunctionPtr); | |
| 622 | |
| 623 PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr); | |
| 624 template <typename CharType> | |
| 625 ALWAYS_INLINE PassRefPtr<StringImpl> removeCharacters(const CharType* charac
ters, CharacterMatchFunctionPtr); | |
| 626 | |
| 627 size_t find(LChar character, unsigned start = 0); | |
| 628 size_t find(char character, unsigned start = 0); | |
| 629 size_t find(UChar character, unsigned start = 0); | |
| 630 WTF_EXPORT_STRING_API size_t find(CharacterMatchFunctionPtr, unsigned index
= 0); | |
| 631 size_t find(const LChar*, unsigned index = 0); | |
| 632 ALWAYS_INLINE size_t find(const char* s, unsigned index = 0) { return find(r
einterpret_cast<const LChar*>(s), index); } | |
| 633 WTF_EXPORT_STRING_API size_t find(StringImpl*); | |
| 634 WTF_EXPORT_STRING_API size_t find(StringImpl*, unsigned index); | |
| 635 size_t findIgnoringCase(const LChar*, unsigned index = 0); | |
| 636 ALWAYS_INLINE size_t findIgnoringCase(const char* s, unsigned index = 0) { r
eturn findIgnoringCase(reinterpret_cast<const LChar*>(s), index); } | |
| 637 WTF_EXPORT_STRING_API size_t findIgnoringCase(StringImpl*, unsigned index =
0); | |
| 638 | |
| 639 WTF_EXPORT_STRING_API size_t findNextLineStart(unsigned index = UINT_MAX); | |
| 640 | |
| 641 WTF_EXPORT_STRING_API size_t reverseFind(UChar, unsigned index = UINT_MAX); | |
| 642 WTF_EXPORT_STRING_API size_t reverseFind(StringImpl*, unsigned index = UINT_
MAX); | |
| 643 WTF_EXPORT_STRING_API size_t reverseFindIgnoringCase(StringImpl*, unsigned i
ndex = UINT_MAX); | |
| 644 | |
| 645 bool startsWith(StringImpl* str, bool caseSensitive = true) { return (caseSe
nsitive ? reverseFind(str, 0) : reverseFindIgnoringCase(str, 0)) == 0; } | |
| 646 WTF_EXPORT_STRING_API bool startsWith(UChar) const; | |
| 647 WTF_EXPORT_STRING_API bool startsWith(const char*, unsigned matchLength, boo
l caseSensitive) const; | |
| 648 template<unsigned matchLength> | |
| 649 bool startsWith(const char (&prefix)[matchLength], bool caseSensitive = true
) const { return startsWith(prefix, matchLength - 1, caseSensitive); } | |
| 650 | |
| 651 WTF_EXPORT_STRING_API bool endsWith(StringImpl*, bool caseSensitive = true); | |
| 652 WTF_EXPORT_STRING_API bool endsWith(UChar) const; | |
| 653 WTF_EXPORT_STRING_API bool endsWith(const char*, unsigned matchLength, bool
caseSensitive) const; | |
| 654 template<unsigned matchLength> | |
| 655 bool endsWith(const char (&prefix)[matchLength], bool caseSensitive = true)
const { return endsWith(prefix, matchLength - 1, caseSensitive); } | |
| 656 | |
| 657 WTF_EXPORT_STRING_API PassRefPtr<StringImpl> replace(UChar, UChar); | |
| 658 WTF_EXPORT_STRING_API PassRefPtr<StringImpl> replace(UChar, StringImpl*); | |
| 659 ALWAYS_INLINE PassRefPtr<StringImpl> replace(UChar pattern, const char* repl
acement, unsigned replacementLength) { return replace(pattern, reinterpret_cast<
const LChar*>(replacement), replacementLength); } | |
| 660 WTF_EXPORT_STRING_API PassRefPtr<StringImpl> replace(UChar, const LChar*, un
signed replacementLength); | |
| 661 PassRefPtr<StringImpl> replace(UChar, const UChar*, unsigned replacementLeng
th); | |
| 662 WTF_EXPORT_STRING_API PassRefPtr<StringImpl> replace(StringImpl*, StringImpl
*); | |
| 663 WTF_EXPORT_STRING_API PassRefPtr<StringImpl> replace(unsigned index, unsigne
d len, StringImpl*); | |
| 664 | |
| 665 WTF_EXPORT_STRING_API WTF::Unicode::Direction defaultWritingDirection(bool*
hasStrongDirectionality = 0); | |
| 666 | |
| 667 #if USE(CF) | |
| 668 RetainPtr<CFStringRef> createCFString(); | |
| 669 #endif | |
| 670 #ifdef __OBJC__ | |
| 671 operator NSString*(); | |
| 672 #endif | |
| 673 | |
| 674 #ifdef STRING_STATS | |
| 675 ALWAYS_INLINE static StringStats& stringStats() { return m_stringStats; } | |
| 676 #endif | |
| 677 | |
| 678 private: | |
| 679 | |
| 680 bool isASCIILiteral() const | |
| 681 { | |
| 682 return is8Bit() && hasInternalBuffer() && reinterpret_cast<const void*>(
m_data8) != reinterpret_cast<const void*>(this + 1); | |
| 683 } | |
| 684 | |
| 685 // This number must be at least 2 to avoid sharing empty, null as well as 1
character strings from SmallStrings. | |
| 686 static const unsigned s_copyCharsInlineCutOff = 20; | |
| 687 | |
| 688 BufferOwnership bufferOwnership() const { return static_cast<BufferOwnership
>(m_hashAndFlags & s_hashMaskBufferOwnership); } | |
| 689 template <class UCharPredicate> PassRefPtr<StringImpl> stripMatchedCharacter
s(UCharPredicate); | |
| 690 template <typename CharType, class UCharPredicate> PassRefPtr<StringImpl> si
mplifyMatchedCharactersToSpace(UCharPredicate); | |
| 691 WTF_EXPORT_STRING_API NEVER_INLINE const UChar* getData16SlowCase() const; | |
| 692 WTF_EXPORT_PRIVATE NEVER_INLINE unsigned hashSlowCase() const; | |
| 693 | |
| 694 // The bottom bit in the ref count indicates a static (immortal) string. | |
| 695 static const unsigned s_refCountFlagIsStaticString = 0x1; | |
| 696 static const unsigned s_refCountIncrement = 0x2; // This allows us to ref /
deref without disturbing the static string flag. | |
| 697 | |
| 698 // The bottom 8 bits in the hash are flags. | |
| 699 static const unsigned s_flagCount = 8; | |
| 700 static const unsigned s_flagMask = (1u << s_flagCount) - 1; | |
| 701 COMPILE_ASSERT(s_flagCount == StringHasher::flagCount, StringHasher_reserves
_enough_bits_for_StringImpl_flags); | |
| 702 | |
| 703 static const unsigned s_hashFlagHas16BitShadow = 1u << 7; | |
| 704 static const unsigned s_hashFlag8BitBuffer = 1u << 6; | |
| 705 static const unsigned s_hashFlagHasTerminatingNullCharacter = 1u << 5; | |
| 706 static const unsigned s_hashFlagIsAtomic = 1u << 4; | |
| 707 static const unsigned s_hashFlagDidReportCost = 1u << 3; | |
| 708 static const unsigned s_hashFlagIsIdentifier = 1u << 2; | |
| 709 static const unsigned s_hashMaskBufferOwnership = 1u | (1u << 1); | |
| 710 | |
| 711 #ifdef STRING_STATS | |
| 712 WTF_EXPORTDATA static StringStats m_stringStats; | |
| 713 #endif | |
| 714 | |
| 715 public: | |
| 716 struct StaticASCIILiteral { | |
| 717 // These member variables must match the layout of StringImpl. | |
| 718 unsigned m_refCount; | |
| 719 unsigned m_length; | |
| 720 const LChar* m_data8; | |
| 721 const UChar* m_copyData16; | |
| 722 unsigned m_hashAndFlags; | |
| 723 | |
| 724 static const unsigned s_initialRefCount = s_refCountFlagIsStaticString; | |
| 725 static const unsigned s_initialFlags = s_hashFlag8BitBuffer | s_hashFlag
Has16BitShadow | BufferInternal | s_hashFlagHasTerminatingNullCharacter; | |
| 726 static const unsigned s_hashShift = s_flagCount; | |
| 727 }; | |
| 728 | |
| 729 #ifndef NDEBUG | |
| 730 void assertHashIsCorrect() | |
| 731 { | |
| 732 ASSERT(hasHash()); | |
| 733 ASSERT(existingHash() == StringHasher::computeHashAndMaskTop8Bits(charac
ters8(), length())); | |
| 734 } | |
| 735 #endif | |
| 736 | |
| 737 private: | |
| 738 // These member variables must match the layout of StaticASCIILiteral. | |
| 739 unsigned m_refCount; | |
| 740 unsigned m_length; | |
| 741 union { | |
| 742 const LChar* m_data8; | |
| 743 const UChar* m_data16; | |
| 744 }; | |
| 745 union { | |
| 746 void* m_buffer; | |
| 747 StringImpl* m_substringBuffer; | |
| 748 mutable UChar* m_copyData16; | |
| 749 }; | |
| 750 mutable unsigned m_hashAndFlags; | |
| 751 }; | |
| 752 | |
| 753 COMPILE_ASSERT(sizeof(StringImpl) == sizeof(StringImpl::StaticASCIILiteral), Str
ingImpl_should_match_its_StaticASCIILiteral); | |
| 754 | |
| 755 #if !ASSERT_DISABLED | |
| 756 // StringImpls created from StaticASCIILiteral will ASSERT | |
| 757 // in the generic ValueCheck<T>::checkConsistency | |
| 758 // as they are not allocated by fastMalloc. | |
| 759 // We don't currently have any way to detect that case | |
| 760 // so we ignore the consistency check for all StringImpl*. | |
| 761 template<> struct | |
| 762 ValueCheck<StringImpl*> { | |
| 763 static void checkConsistency(const StringImpl*) { } | |
| 764 }; | |
| 765 #endif | |
| 766 | |
| 767 template <> | |
| 768 ALWAYS_INLINE const LChar* StringImpl::getCharacters<LChar>() const { return cha
racters8(); } | |
| 769 | |
| 770 template <> | |
| 771 ALWAYS_INLINE const UChar* StringImpl::getCharacters<UChar>() const { return cha
racters(); } | |
| 772 | |
| 773 WTF_EXPORT_STRING_API bool equal(const StringImpl*, const StringImpl*); | |
| 774 WTF_EXPORT_STRING_API bool equal(const StringImpl*, const LChar*); | |
| 775 inline bool equal(const StringImpl* a, const char* b) { return equal(a, reinterp
ret_cast<const LChar*>(b)); } | |
| 776 WTF_EXPORT_STRING_API bool equal(const StringImpl*, const LChar*, unsigned); | |
| 777 inline bool equal(const StringImpl* a, const char* b, unsigned length) { return
equal(a, reinterpret_cast<const LChar*>(b), length); } | |
| 778 inline bool equal(const LChar* a, StringImpl* b) { return equal(b, a); } | |
| 779 inline bool equal(const char* a, StringImpl* b) { return equal(b, reinterpret_ca
st<const LChar*>(a)); } | |
| 780 WTF_EXPORT_STRING_API bool equal(const StringImpl*, const UChar*, unsigned); | |
| 781 WTF_EXPORT_STRING_API bool equalNonNull(const StringImpl* a, const StringImpl* b
); | |
| 782 | |
| 783 // Do comparisons 8 or 4 bytes-at-a-time on architectures where it's safe. | |
| 784 #if CPU(X86_64) | |
| 785 ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) | |
| 786 { | |
| 787 unsigned dwordLength = length >> 3; | |
| 788 | |
| 789 if (dwordLength) { | |
| 790 const uint64_t* aDWordCharacters = reinterpret_cast<const uint64_t*>(a); | |
| 791 const uint64_t* bDWordCharacters = reinterpret_cast<const uint64_t*>(b); | |
| 792 | |
| 793 for (unsigned i = 0; i != dwordLength; ++i) { | |
| 794 if (*aDWordCharacters++ != *bDWordCharacters++) | |
| 795 return false; | |
| 796 } | |
| 797 | |
| 798 a = reinterpret_cast<const LChar*>(aDWordCharacters); | |
| 799 b = reinterpret_cast<const LChar*>(bDWordCharacters); | |
| 800 } | |
| 801 | |
| 802 if (length & 4) { | |
| 803 if (*reinterpret_cast<const uint32_t*>(a) != *reinterpret_cast<const uin
t32_t*>(b)) | |
| 804 return false; | |
| 805 | |
| 806 a += 4; | |
| 807 b += 4; | |
| 808 } | |
| 809 | |
| 810 if (length & 2) { | |
| 811 if (*reinterpret_cast<const uint16_t*>(a) != *reinterpret_cast<const uin
t16_t*>(b)) | |
| 812 return false; | |
| 813 | |
| 814 a += 2; | |
| 815 b += 2; | |
| 816 } | |
| 817 | |
| 818 if (length & 1 && (*a != *b)) | |
| 819 return false; | |
| 820 | |
| 821 return true; | |
| 822 } | |
| 823 | |
| 824 ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) | |
| 825 { | |
| 826 unsigned dwordLength = length >> 2; | |
| 827 | |
| 828 if (dwordLength) { | |
| 829 const uint64_t* aDWordCharacters = reinterpret_cast<const uint64_t*>(a); | |
| 830 const uint64_t* bDWordCharacters = reinterpret_cast<const uint64_t*>(b); | |
| 831 | |
| 832 for (unsigned i = 0; i != dwordLength; ++i) { | |
| 833 if (*aDWordCharacters++ != *bDWordCharacters++) | |
| 834 return false; | |
| 835 } | |
| 836 | |
| 837 a = reinterpret_cast<const UChar*>(aDWordCharacters); | |
| 838 b = reinterpret_cast<const UChar*>(bDWordCharacters); | |
| 839 } | |
| 840 | |
| 841 if (length & 2) { | |
| 842 if (*reinterpret_cast<const uint32_t*>(a) != *reinterpret_cast<const uin
t32_t*>(b)) | |
| 843 return false; | |
| 844 | |
| 845 a += 2; | |
| 846 b += 2; | |
| 847 } | |
| 848 | |
| 849 if (length & 1 && (*a != *b)) | |
| 850 return false; | |
| 851 | |
| 852 return true; | |
| 853 } | |
| 854 #elif CPU(X86) | |
| 855 ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) | |
| 856 { | |
| 857 const uint32_t* aCharacters = reinterpret_cast<const uint32_t*>(a); | |
| 858 const uint32_t* bCharacters = reinterpret_cast<const uint32_t*>(b); | |
| 859 | |
| 860 unsigned wordLength = length >> 2; | |
| 861 for (unsigned i = 0; i != wordLength; ++i) { | |
| 862 if (*aCharacters++ != *bCharacters++) | |
| 863 return false; | |
| 864 } | |
| 865 | |
| 866 length &= 3; | |
| 867 | |
| 868 if (length) { | |
| 869 const LChar* aRemainder = reinterpret_cast<const LChar*>(aCharacters); | |
| 870 const LChar* bRemainder = reinterpret_cast<const LChar*>(bCharacters); | |
| 871 | |
| 872 for (unsigned i = 0; i < length; ++i) { | |
| 873 if (aRemainder[i] != bRemainder[i]) | |
| 874 return false; | |
| 875 } | |
| 876 } | |
| 877 | |
| 878 return true; | |
| 879 } | |
| 880 | |
| 881 ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) | |
| 882 { | |
| 883 const uint32_t* aCharacters = reinterpret_cast<const uint32_t*>(a); | |
| 884 const uint32_t* bCharacters = reinterpret_cast<const uint32_t*>(b); | |
| 885 | |
| 886 unsigned wordLength = length >> 1; | |
| 887 for (unsigned i = 0; i != wordLength; ++i) { | |
| 888 if (*aCharacters++ != *bCharacters++) | |
| 889 return false; | |
| 890 } | |
| 891 | |
| 892 if (length & 1 && *reinterpret_cast<const UChar*>(aCharacters) != *reinterpr
et_cast<const UChar*>(bCharacters)) | |
| 893 return false; | |
| 894 | |
| 895 return true; | |
| 896 } | |
| 897 #else | |
| 898 ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) | |
| 899 { | |
| 900 for (unsigned i = 0; i != length; ++i) { | |
| 901 if (a[i] != b[i]) | |
| 902 return false; | |
| 903 } | |
| 904 | |
| 905 return true; | |
| 906 } | |
| 907 | |
| 908 ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) | |
| 909 { | |
| 910 for (unsigned i = 0; i != length; ++i) { | |
| 911 if (a[i] != b[i]) | |
| 912 return false; | |
| 913 } | |
| 914 | |
| 915 return true; | |
| 916 } | |
| 917 #endif | |
| 918 | |
| 919 ALWAYS_INLINE bool equal(const LChar* a, const UChar* b, unsigned length) | |
| 920 { | |
| 921 for (unsigned i = 0; i != length; ++i) { | |
| 922 if (a[i] != b[i]) | |
| 923 return false; | |
| 924 } | |
| 925 | |
| 926 return true; | |
| 927 } | |
| 928 | |
| 929 ALWAYS_INLINE bool equal(const UChar* a, const LChar* b, unsigned length) | |
| 930 { | |
| 931 for (unsigned i = 0; i != length; ++i) { | |
| 932 if (a[i] != b[i]) | |
| 933 return false; | |
| 934 } | |
| 935 | |
| 936 return true; | |
| 937 } | |
| 938 | |
| 939 WTF_EXPORT_STRING_API bool equalIgnoringCase(const StringImpl*, const StringImpl
*); | |
| 940 WTF_EXPORT_STRING_API bool equalIgnoringCase(const StringImpl*, const LChar*); | |
| 941 inline bool equalIgnoringCase(const LChar* a, const StringImpl* b) { return equa
lIgnoringCase(b, a); } | |
| 942 WTF_EXPORT_STRING_API bool equalIgnoringCase(const LChar*, const LChar*, unsigne
d); | |
| 943 WTF_EXPORT_STRING_API bool equalIgnoringCase(const UChar*, const LChar*, unsigne
d); | |
| 944 inline bool equalIgnoringCase(const UChar* a, const char* b, unsigned length) {
return equalIgnoringCase(a, reinterpret_cast<const LChar*>(b), length); } | |
| 945 inline bool equalIgnoringCase(const LChar* a, const UChar* b, unsigned length) {
return equalIgnoringCase(b, a, length); } | |
| 946 inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) {
return equalIgnoringCase(b, reinterpret_cast<const LChar*>(a), length); } | |
| 947 inline bool equalIgnoringCase(const char* a, const LChar* b, unsigned length) {
return equalIgnoringCase(b, reinterpret_cast<const LChar*>(a), length); } | |
| 948 inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length) | |
| 949 { | |
| 950 ASSERT(length >= 0); | |
| 951 return !Unicode::umemcasecmp(a, b, length); | |
| 952 } | |
| 953 WTF_EXPORT_STRING_API bool equalIgnoringCaseNonNull(const StringImpl*, const Str
ingImpl*); | |
| 954 | |
| 955 WTF_EXPORT_STRING_API bool equalIgnoringNullity(StringImpl*, StringImpl*); | |
| 956 | |
| 957 template<typename CharacterType> | |
| 958 inline size_t find(const CharacterType* characters, unsigned length, CharacterTy
pe matchCharacter, unsigned index = 0) | |
| 959 { | |
| 960 while (index < length) { | |
| 961 if (characters[index] == matchCharacter) | |
| 962 return index; | |
| 963 ++index; | |
| 964 } | |
| 965 return notFound; | |
| 966 } | |
| 967 | |
| 968 ALWAYS_INLINE size_t find(const UChar* characters, unsigned length, LChar matchC
haracter, unsigned index = 0) | |
| 969 { | |
| 970 return find(characters, length, static_cast<UChar>(matchCharacter), index); | |
| 971 } | |
| 972 | |
| 973 inline size_t find(const LChar* characters, unsigned length, UChar matchCharacte
r, unsigned index = 0) | |
| 974 { | |
| 975 if (matchCharacter & ~0xFF) | |
| 976 return notFound; | |
| 977 return find(characters, length, static_cast<LChar>(matchCharacter), index); | |
| 978 } | |
| 979 | |
| 980 inline size_t find(const LChar* characters, unsigned length, CharacterMatchFunct
ionPtr matchFunction, unsigned index = 0) | |
| 981 { | |
| 982 while (index < length) { | |
| 983 if (matchFunction(characters[index])) | |
| 984 return index; | |
| 985 ++index; | |
| 986 } | |
| 987 return notFound; | |
| 988 } | |
| 989 | |
| 990 inline size_t find(const UChar* characters, unsigned length, CharacterMatchFunct
ionPtr matchFunction, unsigned index = 0) | |
| 991 { | |
| 992 while (index < length) { | |
| 993 if (matchFunction(characters[index])) | |
| 994 return index; | |
| 995 ++index; | |
| 996 } | |
| 997 return notFound; | |
| 998 } | |
| 999 | |
| 1000 template<typename CharacterType> | |
| 1001 inline size_t findNextLineStart(const CharacterType* characters, unsigned length
, unsigned index = 0) | |
| 1002 { | |
| 1003 while (index < length) { | |
| 1004 CharacterType c = characters[index++]; | |
| 1005 if ((c != '\n') && (c != '\r')) | |
| 1006 continue; | |
| 1007 | |
| 1008 // There can only be a start of a new line if there are more characters | |
| 1009 // beyond the current character. | |
| 1010 if (index < length) { | |
| 1011 // The 3 common types of line terminators are 1. \r\n (Windows), | |
| 1012 // 2. \r (old MacOS) and 3. \n (Unix'es). | |
| 1013 | |
| 1014 if (c == '\n') | |
| 1015 return index; // Case 3: just \n. | |
| 1016 | |
| 1017 CharacterType c2 = characters[index]; | |
| 1018 if (c2 != '\n') | |
| 1019 return index; // Case 2: just \r. | |
| 1020 | |
| 1021 // Case 1: \r\n. | |
| 1022 // But, there's only a start of a new line if there are more | |
| 1023 // characters beyond the \r\n. | |
| 1024 if (++index < length) | |
| 1025 return index; | |
| 1026 } | |
| 1027 } | |
| 1028 return notFound; | |
| 1029 } | |
| 1030 | |
| 1031 template<typename CharacterType> | |
| 1032 inline size_t reverseFindLineTerminator(const CharacterType* characters, unsigne
d length, unsigned index = UINT_MAX) | |
| 1033 { | |
| 1034 if (!length) | |
| 1035 return notFound; | |
| 1036 if (index >= length) | |
| 1037 index = length - 1; | |
| 1038 CharacterType c = characters[index]; | |
| 1039 while ((c != '\n') && (c != '\r')) { | |
| 1040 if (!index--) | |
| 1041 return notFound; | |
| 1042 c = characters[index]; | |
| 1043 } | |
| 1044 return index; | |
| 1045 } | |
| 1046 | |
| 1047 template<typename CharacterType> | |
| 1048 inline size_t reverseFind(const CharacterType* characters, unsigned length, Char
acterType matchCharacter, unsigned index = UINT_MAX) | |
| 1049 { | |
| 1050 if (!length) | |
| 1051 return notFound; | |
| 1052 if (index >= length) | |
| 1053 index = length - 1; | |
| 1054 while (characters[index] != matchCharacter) { | |
| 1055 if (!index--) | |
| 1056 return notFound; | |
| 1057 } | |
| 1058 return index; | |
| 1059 } | |
| 1060 | |
| 1061 ALWAYS_INLINE size_t reverseFind(const UChar* characters, unsigned length, LChar
matchCharacter, unsigned index = UINT_MAX) | |
| 1062 { | |
| 1063 return reverseFind(characters, length, static_cast<UChar>(matchCharacter), i
ndex); | |
| 1064 } | |
| 1065 | |
| 1066 inline size_t reverseFind(const LChar* characters, unsigned length, UChar matchC
haracter, unsigned index = UINT_MAX) | |
| 1067 { | |
| 1068 if (matchCharacter & ~0xFF) | |
| 1069 return notFound; | |
| 1070 return reverseFind(characters, length, static_cast<LChar>(matchCharacter), i
ndex); | |
| 1071 } | |
| 1072 | |
| 1073 inline size_t StringImpl::find(LChar character, unsigned start) | |
| 1074 { | |
| 1075 if (is8Bit()) | |
| 1076 return WTF::find(characters8(), m_length, character, start); | |
| 1077 return WTF::find(characters16(), m_length, character, start); | |
| 1078 } | |
| 1079 | |
| 1080 ALWAYS_INLINE size_t StringImpl::find(char character, unsigned start) | |
| 1081 { | |
| 1082 return find(static_cast<LChar>(character), start); | |
| 1083 } | |
| 1084 | |
| 1085 inline size_t StringImpl::find(UChar character, unsigned start) | |
| 1086 { | |
| 1087 if (is8Bit()) | |
| 1088 return WTF::find(characters8(), m_length, character, start); | |
| 1089 return WTF::find(characters16(), m_length, character, start); | |
| 1090 } | |
| 1091 | |
| 1092 template<size_t inlineCapacity> | |
| 1093 bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, StringImpl* b) | |
| 1094 { | |
| 1095 if (!b) | |
| 1096 return !a.size(); | |
| 1097 if (a.size() != b->length()) | |
| 1098 return false; | |
| 1099 return !memcmp(a.data(), b->characters(), b->length() * sizeof(UChar)); | |
| 1100 } | |
| 1101 | |
| 1102 template<typename CharacterType1, typename CharacterType2> | |
| 1103 static inline int codePointCompare(unsigned l1, unsigned l2, const CharacterType
1* c1, const CharacterType2* c2) | |
| 1104 { | |
| 1105 const unsigned lmin = l1 < l2 ? l1 : l2; | |
| 1106 unsigned pos = 0; | |
| 1107 while (pos < lmin && *c1 == *c2) { | |
| 1108 ++c1; | |
| 1109 ++c2; | |
| 1110 ++pos; | |
| 1111 } | |
| 1112 | |
| 1113 if (pos < lmin) | |
| 1114 return (c1[0] > c2[0]) ? 1 : -1; | |
| 1115 | |
| 1116 if (l1 == l2) | |
| 1117 return 0; | |
| 1118 | |
| 1119 return (l1 > l2) ? 1 : -1; | |
| 1120 } | |
| 1121 | |
| 1122 static inline int codePointCompare8(const StringImpl* string1, const StringImpl*
string2) | |
| 1123 { | |
| 1124 return codePointCompare(string1->length(), string2->length(), string1->chara
cters8(), string2->characters8()); | |
| 1125 } | |
| 1126 | |
| 1127 static inline int codePointCompare16(const StringImpl* string1, const StringImpl
* string2) | |
| 1128 { | |
| 1129 return codePointCompare(string1->length(), string2->length(), string1->chara
cters16(), string2->characters16()); | |
| 1130 } | |
| 1131 | |
| 1132 static inline int codePointCompare8To16(const StringImpl* string1, const StringI
mpl* string2) | |
| 1133 { | |
| 1134 return codePointCompare(string1->length(), string2->length(), string1->chara
cters8(), string2->characters16()); | |
| 1135 } | |
| 1136 | |
| 1137 static inline int codePointCompare(const StringImpl* string1, const StringImpl*
string2) | |
| 1138 { | |
| 1139 if (!string1) | |
| 1140 return (string2 && string2->length()) ? -1 : 0; | |
| 1141 | |
| 1142 if (!string2) | |
| 1143 return string1->length() ? 1 : 0; | |
| 1144 | |
| 1145 bool string1Is8Bit = string1->is8Bit(); | |
| 1146 bool string2Is8Bit = string2->is8Bit(); | |
| 1147 if (string1Is8Bit) { | |
| 1148 if (string2Is8Bit) | |
| 1149 return codePointCompare8(string1, string2); | |
| 1150 return codePointCompare8To16(string1, string2); | |
| 1151 } | |
| 1152 if (string2Is8Bit) | |
| 1153 return -codePointCompare8To16(string2, string1); | |
| 1154 return codePointCompare16(string1, string2); | |
| 1155 } | |
| 1156 | |
| 1157 static inline bool isSpaceOrNewline(UChar c) | |
| 1158 { | |
| 1159 // Use isASCIISpace() for basic Latin-1. | |
| 1160 // This will include newlines, which aren't included in Unicode DirWS. | |
| 1161 return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF:
:Unicode::WhiteSpaceNeutral; | |
| 1162 } | |
| 1163 | |
| 1164 inline PassRefPtr<StringImpl> StringImpl::isolatedCopy() const | |
| 1165 { | |
| 1166 if (isASCIILiteral()) | |
| 1167 return StringImpl::createFromLiteral(reinterpret_cast<const char*>(m_dat
a8), m_length); | |
| 1168 if (is8Bit()) | |
| 1169 return create(m_data8, m_length); | |
| 1170 return create(m_data16, m_length); | |
| 1171 } | |
| 1172 | |
| 1173 struct StringHash; | |
| 1174 | |
| 1175 // StringHash is the default hash for StringImpl* and RefPtr<StringImpl> | |
| 1176 template<typename T> struct DefaultHash; | |
| 1177 template<> struct DefaultHash<StringImpl*> { | |
| 1178 typedef StringHash Hash; | |
| 1179 }; | |
| 1180 template<> struct DefaultHash<RefPtr<StringImpl> > { | |
| 1181 typedef StringHash Hash; | |
| 1182 }; | |
| 1183 | |
| 1184 } | |
| 1185 | |
| 1186 using WTF::StringImpl; | |
| 1187 using WTF::equal; | |
| 1188 using WTF::equalNonNull; | |
| 1189 using WTF::TextCaseSensitivity; | |
| 1190 using WTF::TextCaseSensitive; | |
| 1191 using WTF::TextCaseInsensitive; | |
| 1192 | |
| 1193 #endif | |
| OLD | NEW |