| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org) | 2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org) |
| 3 * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2013 Apple Inc. All rights
reserved. | 3 * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2013 Apple Inc. All rights
reserved. |
| 4 * Copyright (C) 2009 Google Inc. All rights reserved. | 4 * Copyright (C) 2009 Google Inc. All rights reserved. |
| 5 * | 5 * |
| 6 * This library is free software; you can redistribute it and/or | 6 * This library is free software; you can redistribute it and/or |
| 7 * modify it under the terms of the GNU Library General Public | 7 * modify it under the terms of the GNU Library General Public |
| 8 * License as published by the Free Software Foundation; either | 8 * License as published by the Free Software Foundation; either |
| 9 * version 2 of the License, or (at your option) any later version. | 9 * version 2 of the License, or (at your option) any later version. |
| 10 * | 10 * |
| (...skipping 15 matching lines...) Expand all Loading... |
| 26 #include "wtf/ASCIICType.h" | 26 #include "wtf/ASCIICType.h" |
| 27 #include "wtf/Forward.h" | 27 #include "wtf/Forward.h" |
| 28 #include "wtf/HashMap.h" | 28 #include "wtf/HashMap.h" |
| 29 #include "wtf/StringHasher.h" | 29 #include "wtf/StringHasher.h" |
| 30 #include "wtf/Vector.h" | 30 #include "wtf/Vector.h" |
| 31 #include "wtf/WTFExport.h" | 31 #include "wtf/WTFExport.h" |
| 32 #include "wtf/text/Unicode.h" | 32 #include "wtf/text/Unicode.h" |
| 33 #include <limits.h> | 33 #include <limits.h> |
| 34 | 34 |
| 35 #if OS(MACOSX) | 35 #if OS(MACOSX) |
| 36 typedef const struct __CFString * CFStringRef; | 36 typedef const struct __CFString* CFStringRef; |
| 37 #endif | 37 #endif |
| 38 | 38 |
| 39 #ifdef __OBJC__ | 39 #ifdef __OBJC__ |
| 40 @class NSString; | 40 @class NSString; |
| 41 #endif | 41 #endif |
| 42 | 42 |
| 43 namespace WTF { | 43 namespace WTF { |
| 44 | 44 |
| 45 struct AlreadyHashed; | 45 struct AlreadyHashed; |
| 46 struct CStringTranslator; | 46 struct CStringTranslator; |
| 47 template<typename CharacterType> struct HashAndCharactersTranslator; | 47 template <typename CharacterType> |
| 48 struct HashAndCharactersTranslator; |
| 48 struct HashAndUTF8CharactersTranslator; | 49 struct HashAndUTF8CharactersTranslator; |
| 49 struct LCharBufferTranslator; | 50 struct LCharBufferTranslator; |
| 50 struct CharBufferFromLiteralDataTranslator; | 51 struct CharBufferFromLiteralDataTranslator; |
| 51 struct SubstringTranslator; | 52 struct SubstringTranslator; |
| 52 struct UCharBufferTranslator; | 53 struct UCharBufferTranslator; |
| 53 template<typename> class RetainPtr; | 54 template <typename> |
| 55 class RetainPtr; |
| 54 | 56 |
| 55 enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive }; | 57 enum TextCaseSensitivity { TextCaseSensitive, |
| 58 TextCaseInsensitive }; |
| 56 | 59 |
| 57 enum StripBehavior { StripExtraWhiteSpace, DoNotStripWhiteSpace }; | 60 enum StripBehavior { StripExtraWhiteSpace, |
| 61 DoNotStripWhiteSpace }; |
| 58 | 62 |
| 59 typedef bool (*CharacterMatchFunctionPtr)(UChar); | 63 typedef bool (*CharacterMatchFunctionPtr)(UChar); |
| 60 typedef bool (*IsWhiteSpaceFunctionPtr)(UChar); | 64 typedef bool (*IsWhiteSpaceFunctionPtr)(UChar); |
| 61 typedef HashMap<unsigned, StringImpl*, AlreadyHashed> StaticStringsTable; | 65 typedef HashMap<unsigned, StringImpl*, AlreadyHashed> StaticStringsTable; |
| 62 | 66 |
| 63 // Define STRING_STATS to turn on run time statistics of string sizes and memory
usage | 67 // Define STRING_STATS to turn on run time statistics of string sizes and memory
usage |
| 64 #undef STRING_STATS | 68 #undef STRING_STATS |
| 65 | 69 |
| 66 #ifdef STRING_STATS | 70 #ifdef STRING_STATS |
| 67 struct StringStats { | 71 struct StringStats { |
| 68 inline void add8BitString(unsigned length) | 72 inline void add8BitString(unsigned length) { |
| 69 { | 73 ++m_totalNumberStrings; |
| 70 ++m_totalNumberStrings; | 74 ++m_number8BitStrings; |
| 71 ++m_number8BitStrings; | 75 m_total8BitData += length; |
| 72 m_total8BitData += length; | 76 } |
| 73 } | |
| 74 | 77 |
| 75 inline void add16BitString(unsigned length) | 78 inline void add16BitString(unsigned length) { |
| 76 { | 79 ++m_totalNumberStrings; |
| 77 ++m_totalNumberStrings; | 80 ++m_number16BitStrings; |
| 78 ++m_number16BitStrings; | 81 m_total16BitData += length; |
| 79 m_total16BitData += length; | 82 } |
| 80 } | |
| 81 | 83 |
| 82 void removeString(StringImpl*); | 84 void removeString(StringImpl*); |
| 83 void printStats(); | 85 void printStats(); |
| 84 | 86 |
| 85 static const unsigned s_printStringStatsFrequency = 5000; | 87 static const unsigned s_printStringStatsFrequency = 5000; |
| 86 static unsigned s_stringRemovesTillPrintStats; | 88 static unsigned s_stringRemovesTillPrintStats; |
| 87 | 89 |
| 88 unsigned m_totalNumberStrings; | 90 unsigned m_totalNumberStrings; |
| 89 unsigned m_number8BitStrings; | 91 unsigned m_number8BitStrings; |
| 90 unsigned m_number16BitStrings; | 92 unsigned m_number16BitStrings; |
| 91 unsigned long long m_total8BitData; | 93 unsigned long long m_total8BitData; |
| 92 unsigned long long m_total16BitData; | 94 unsigned long long m_total16BitData; |
| 93 }; | 95 }; |
| 94 | 96 |
| 95 void addStringForStats(StringImpl*); | 97 void addStringForStats(StringImpl*); |
| 96 void removeStringForStats(StringImpl*); | 98 void removeStringForStats(StringImpl*); |
| 97 | 99 |
| 98 #define STRING_STATS_ADD_8BIT_STRING(length) StringImpl::stringStats().add8BitSt
ring(length); addStringForStats(this) | 100 #define STRING_STATS_ADD_8BIT_STRING(length) \ |
| 99 #define STRING_STATS_ADD_16BIT_STRING(length) StringImpl::stringStats().add16Bit
String(length); addStringForStats(this) | 101 StringImpl::stringStats().add8BitString(length); \ |
| 100 #define STRING_STATS_REMOVE_STRING(string) StringImpl::stringStats().removeStrin
g(string); removeStringForStats(this) | 102 addStringForStats(this) |
| 103 #define STRING_STATS_ADD_16BIT_STRING(length) \ |
| 104 StringImpl::stringStats().add16BitString(length); \ |
| 105 addStringForStats(this) |
| 106 #define STRING_STATS_REMOVE_STRING(string) \ |
| 107 StringImpl::stringStats().removeString(string); \ |
| 108 removeStringForStats(this) |
| 101 #else | 109 #else |
| 102 #define STRING_STATS_ADD_8BIT_STRING(length) ((void)0) | 110 #define STRING_STATS_ADD_8BIT_STRING(length) ((void)0) |
| 103 #define STRING_STATS_ADD_16BIT_STRING(length) ((void)0) | 111 #define STRING_STATS_ADD_16BIT_STRING(length) ((void)0) |
| 104 #define STRING_STATS_REMOVE_STRING(string) ((void)0) | 112 #define STRING_STATS_REMOVE_STRING(string) ((void)0) |
| 105 #endif | 113 #endif |
| 106 | 114 |
| 107 // You can find documentation about this class in this doc: | 115 // You can find documentation about this class in this doc: |
| 108 // https://docs.google.com/document/d/1kOCUlJdh2WJMJGDf-WoEQhmnjKLaOYRbiHz5TiGJl
14/edit?usp=sharing | 116 // https://docs.google.com/document/d/1kOCUlJdh2WJMJGDf-WoEQhmnjKLaOYRbiHz5TiGJl
14/edit?usp=sharing |
| 109 class WTF_EXPORT StringImpl { | 117 class WTF_EXPORT StringImpl { |
| 110 WTF_MAKE_NONCOPYABLE(StringImpl); | 118 WTF_MAKE_NONCOPYABLE(StringImpl); |
| 111 friend struct WTF::CStringTranslator; | 119 friend struct WTF::CStringTranslator; |
| 112 template<typename CharacterType> friend struct WTF::HashAndCharactersTransla
tor; | 120 template <typename CharacterType> |
| 113 friend struct WTF::HashAndUTF8CharactersTranslator; | 121 friend struct WTF::HashAndCharactersTranslator; |
| 114 friend struct WTF::CharBufferFromLiteralDataTranslator; | 122 friend struct WTF::HashAndUTF8CharactersTranslator; |
| 115 friend struct WTF::LCharBufferTranslator; | 123 friend struct WTF::CharBufferFromLiteralDataTranslator; |
| 116 friend struct WTF::SubstringTranslator; | 124 friend struct WTF::LCharBufferTranslator; |
| 117 friend struct WTF::UCharBufferTranslator; | 125 friend struct WTF::SubstringTranslator; |
| 118 | 126 friend struct WTF::UCharBufferTranslator; |
| 119 private: | 127 |
| 120 // StringImpls are allocated out of the WTF buffer partition. | 128 private: |
| 121 void* operator new(size_t); | 129 // StringImpls are allocated out of the WTF buffer partition. |
| 122 void* operator new(size_t, void* ptr) { return ptr; } | 130 void* operator new(size_t); |
| 123 void operator delete(void*); | 131 void* operator new(size_t, void* ptr) { return ptr; } |
| 124 | 132 void operator delete(void*); |
| 125 // Used to construct static strings, which have an special refCount that can | 133 |
| 126 // never hit zero. This means that the static string will never be | 134 // Used to construct static strings, which have an special refCount that can |
| 127 // destroyed, which is important because static strings will be shared | 135 // never hit zero. This means that the static string will never be |
| 128 // across threads & ref-counted in a non-threadsafe manner. | 136 // destroyed, which is important because static strings will be shared |
| 129 enum ConstructEmptyStringTag { ConstructEmptyString }; | 137 // across threads & ref-counted in a non-threadsafe manner. |
| 130 explicit StringImpl(ConstructEmptyStringTag) | 138 enum ConstructEmptyStringTag { ConstructEmptyString }; |
| 131 : m_refCount(1) | 139 explicit StringImpl(ConstructEmptyStringTag) |
| 132 , m_length(0) | 140 : m_refCount(1), m_length(0), m_hash(0), m_isAtomic(false), m_is8Bit(true)
, m_isStatic(true) { |
| 133 , m_hash(0) | 141 // Ensure that the hash is computed so that AtomicStringHash can call |
| 134 , m_isAtomic(false) | 142 // existingHash() with impunity. The empty string is special because it |
| 135 , m_is8Bit(true) | 143 // is never entered into AtomicString's HashKey, but still needs to |
| 136 , m_isStatic(true) | 144 // compare correctly. |
| 137 { | 145 STRING_STATS_ADD_8BIT_STRING(m_length); |
| 138 // Ensure that the hash is computed so that AtomicStringHash can call | 146 hash(); |
| 139 // existingHash() with impunity. The empty string is special because it | 147 } |
| 140 // is never entered into AtomicString's HashKey, but still needs to | 148 |
| 141 // compare correctly. | 149 enum ConstructEmptyString16BitTag { ConstructEmptyString16Bit }; |
| 142 STRING_STATS_ADD_8BIT_STRING(m_length); | 150 explicit StringImpl(ConstructEmptyString16BitTag) |
| 143 hash(); | 151 : m_refCount(1), m_length(0), m_hash(0), m_isAtomic(false), m_is8Bit(false
), m_isStatic(true) { |
| 152 STRING_STATS_ADD_16BIT_STRING(m_length); |
| 153 hash(); |
| 154 } |
| 155 |
| 156 // FIXME: there has to be a less hacky way to do this. |
| 157 enum Force8Bit { Force8BitConstructor }; |
| 158 StringImpl(unsigned length, Force8Bit) |
| 159 : m_refCount(1), m_length(length), m_hash(0), m_isAtomic(false), m_is8Bit(
true), m_isStatic(false) { |
| 160 ASSERT(m_length); |
| 161 STRING_STATS_ADD_8BIT_STRING(m_length); |
| 162 } |
| 163 |
| 164 StringImpl(unsigned length) |
| 165 : m_refCount(1), m_length(length), m_hash(0), m_isAtomic(false), m_is8Bit(
false), m_isStatic(false) { |
| 166 ASSERT(m_length); |
| 167 STRING_STATS_ADD_16BIT_STRING(m_length); |
| 168 } |
| 169 |
| 170 enum StaticStringTag { StaticString }; |
| 171 StringImpl(unsigned length, unsigned hash, StaticStringTag) |
| 172 : m_refCount(1), m_length(length), m_hash(hash), m_isAtomic(false), m_is8B
it(true), m_isStatic(true) { |
| 173 } |
| 174 |
| 175 public: |
| 176 ~StringImpl(); |
| 177 |
| 178 static StringImpl* createStatic(const char* string, unsigned length, unsigned
hash); |
| 179 static void reserveStaticStringsCapacityForSize(unsigned size); |
| 180 static void freezeStaticStrings(); |
| 181 static const StaticStringsTable& allStaticStrings(); |
| 182 static unsigned highestStaticStringLength() { return m_highestStaticStringLeng
th; } |
| 183 |
| 184 static PassRefPtr<StringImpl> create(const UChar*, unsigned length); |
| 185 static PassRefPtr<StringImpl> create(const LChar*, unsigned length); |
| 186 static PassRefPtr<StringImpl> create8BitIfPossible(const UChar*, unsigned leng
th); |
| 187 template <size_t inlineCapacity> |
| 188 static PassRefPtr<StringImpl> create8BitIfPossible(const Vector<UChar, inlineC
apacity>& vector) { |
| 189 return create8BitIfPossible(vector.data(), vector.size()); |
| 190 } |
| 191 |
| 192 ALWAYS_INLINE static PassRefPtr<StringImpl> create(const char* s, unsigned len
gth) { return create(reinterpret_cast<const LChar*>(s), length); } |
| 193 static PassRefPtr<StringImpl> create(const LChar*); |
| 194 ALWAYS_INLINE static PassRefPtr<StringImpl> create(const char* s) { return cre
ate(reinterpret_cast<const LChar*>(s)); } |
| 195 |
| 196 static PassRefPtr<StringImpl> createUninitialized(unsigned length, LChar*& dat
a); |
| 197 static PassRefPtr<StringImpl> createUninitialized(unsigned length, UChar*& dat
a); |
| 198 |
| 199 unsigned length() const { return m_length; } |
| 200 bool is8Bit() const { return m_is8Bit; } |
| 201 |
| 202 ALWAYS_INLINE const LChar* characters8() const { |
| 203 ASSERT(is8Bit()); |
| 204 return reinterpret_cast<const LChar*>(this + 1); |
| 205 } |
| 206 ALWAYS_INLINE const UChar* characters16() const { |
| 207 ASSERT(!is8Bit()); |
| 208 return reinterpret_cast<const UChar*>(this + 1); |
| 209 } |
| 210 |
| 211 template <typename CharType> |
| 212 ALWAYS_INLINE const CharType* getCharacters() const; |
| 213 |
| 214 size_t sizeInBytes() const; |
| 215 |
| 216 bool isAtomic() const { return m_isAtomic; } |
| 217 void setIsAtomic(bool isAtomic) { m_isAtomic = isAtomic; } |
| 218 |
| 219 bool isStatic() const { return m_isStatic; } |
| 220 |
| 221 private: |
| 222 // The high bits of 'hash' are always empty, but we prefer to store our |
| 223 // flags in the low bits because it makes them slightly more efficient to |
| 224 // access. So, we shift left and right when setting and getting our hash |
| 225 // code. |
| 226 void setHash(unsigned hash) const { |
| 227 ASSERT(!hasHash()); |
| 228 // Multiple clients assume that StringHasher is the canonical string |
| 229 // hash function. |
| 230 ASSERT(hash == (is8Bit() ? StringHasher::computeHashAndMaskTop8Bits(characte
rs8(), m_length) : StringHasher::computeHashAndMaskTop8Bits(characters16(), m_le
ngth))); |
| 231 m_hash = hash; |
| 232 ASSERT(hash); // Verify that 0 is a valid sentinel hash value. |
| 233 } |
| 234 |
| 235 unsigned rawHash() const { |
| 236 return m_hash; |
| 237 } |
| 238 |
| 239 void destroyIfNotStatic(); |
| 240 |
| 241 public: |
| 242 bool hasHash() const { |
| 243 return rawHash() != 0; |
| 244 } |
| 245 |
| 246 unsigned existingHash() const { |
| 247 ASSERT(hasHash()); |
| 248 return rawHash(); |
| 249 } |
| 250 |
| 251 unsigned hash() const { |
| 252 if (hasHash()) |
| 253 return existingHash(); |
| 254 return hashSlowCase(); |
| 255 } |
| 256 |
| 257 ALWAYS_INLINE bool hasOneRef() const { |
| 258 return m_refCount == 1; |
| 259 } |
| 260 |
| 261 ALWAYS_INLINE void ref() { |
| 262 ++m_refCount; |
| 263 } |
| 264 |
| 265 ALWAYS_INLINE void deref() { |
| 266 if (hasOneRef()) { |
| 267 destroyIfNotStatic(); |
| 268 return; |
| 144 } | 269 } |
| 145 | 270 |
| 146 enum ConstructEmptyString16BitTag { ConstructEmptyString16Bit }; | 271 --m_refCount; |
| 147 explicit StringImpl(ConstructEmptyString16BitTag) | 272 } |
| 148 : m_refCount(1) | 273 |
| 149 , m_length(0) | 274 static StringImpl* empty(); |
| 150 , m_hash(0) | 275 static StringImpl* empty16Bit(); |
| 151 , m_isAtomic(false) | 276 |
| 152 , m_is8Bit(false) | 277 // FIXME: Does this really belong in StringImpl? |
| 153 , m_isStatic(true) | 278 template <typename T> |
| 154 { | 279 static void copyChars(T* destination, const T* source, unsigned numCharacters)
{ |
| 155 STRING_STATS_ADD_16BIT_STRING(m_length); | 280 memcpy(destination, source, numCharacters * sizeof(T)); |
| 156 hash(); | 281 } |
| 157 } | 282 |
| 158 | 283 ALWAYS_INLINE static void copyChars(UChar* destination, const LChar* source, u
nsigned numCharacters) { |
| 159 // FIXME: there has to be a less hacky way to do this. | 284 for (unsigned i = 0; i < numCharacters; ++i) |
| 160 enum Force8Bit { Force8BitConstructor }; | 285 destination[i] = source[i]; |
| 161 StringImpl(unsigned length, Force8Bit) | 286 } |
| 162 : m_refCount(1) | 287 |
| 163 , m_length(length) | 288 // Some string features, like refcounting and the atomicity flag, are not |
| 164 , m_hash(0) | 289 // thread-safe. We achieve thread safety by isolation, giving each thread |
| 165 , m_isAtomic(false) | 290 // its own copy of the string. |
| 166 , m_is8Bit(true) | 291 PassRefPtr<StringImpl> isolatedCopy() const; |
| 167 , m_isStatic(false) | 292 |
| 168 { | 293 PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX); |
| 169 ASSERT(m_length); | 294 |
| 170 STRING_STATS_ADD_8BIT_STRING(m_length); | 295 UChar operator[](unsigned i) const { |
| 171 } | 296 ASSERT_WITH_SECURITY_IMPLICATION(i < m_length); |
| 172 | 297 if (is8Bit()) |
| 173 StringImpl(unsigned length) | 298 return characters8()[i]; |
| 174 : m_refCount(1) | 299 return characters16()[i]; |
| 175 , m_length(length) | 300 } |
| 176 , m_hash(0) | 301 UChar32 characterStartingAt(unsigned); |
| 177 , m_isAtomic(false) | 302 |
| 178 , m_is8Bit(false) | 303 bool containsOnlyWhitespace(); |
| 179 , m_isStatic(false) | 304 |
| 180 { | 305 int toIntStrict(bool* ok = 0, int base = 10); |
| 181 ASSERT(m_length); | 306 unsigned toUIntStrict(bool* ok = 0, int base = 10); |
| 182 STRING_STATS_ADD_16BIT_STRING(m_length); | 307 int64_t toInt64Strict(bool* ok = 0, int base = 10); |
| 183 } | 308 uint64_t toUInt64Strict(bool* ok = 0, int base = 10); |
| 184 | 309 |
| 185 enum StaticStringTag { StaticString }; | 310 int toInt(bool* ok = 0); // ignores trailing garbage |
| 186 StringImpl(unsigned length, unsigned hash, StaticStringTag) | 311 unsigned toUInt(bool* ok = 0); // ignores trailing garbage |
| 187 : m_refCount(1) | 312 int64_t toInt64(bool* ok = 0); // ignores trailing garbage |
| 188 , m_length(length) | 313 uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage |
| 189 , m_hash(hash) | 314 |
| 190 , m_isAtomic(false) | 315 // FIXME: Like the strict functions above, these give false for "ok" when |
| 191 , m_is8Bit(true) | 316 // there is trailing garbage. Like the non-strict functions above, these |
| 192 , m_isStatic(true) | 317 // return the value when there is trailing garbage. It would be better if |
| 193 { | 318 // these were more consistent with the above functions instead. |
| 194 } | 319 double toDouble(bool* ok = 0); |
| 195 | 320 float toFloat(bool* ok = 0); |
| 196 public: | 321 |
| 197 ~StringImpl(); | 322 PassRefPtr<StringImpl> lower(); |
| 198 | 323 PassRefPtr<StringImpl> upper(); |
| 199 static StringImpl* createStatic(const char* string, unsigned length, unsigne
d hash); | 324 PassRefPtr<StringImpl> lower(const AtomicString& localeIdentifier); |
| 200 static void reserveStaticStringsCapacityForSize(unsigned size); | 325 PassRefPtr<StringImpl> upper(const AtomicString& localeIdentifier); |
| 201 static void freezeStaticStrings(); | 326 |
| 202 static const StaticStringsTable& allStaticStrings(); | 327 PassRefPtr<StringImpl> fill(UChar); |
| 203 static unsigned highestStaticStringLength() { return m_highestStaticStringLe
ngth; } | 328 // FIXME: Do we need fill(char) or can we just do the right thing if UChar is
ASCII? |
| 204 | 329 PassRefPtr<StringImpl> foldCase(); |
| 205 static PassRefPtr<StringImpl> create(const UChar*, unsigned length); | 330 |
| 206 static PassRefPtr<StringImpl> create(const LChar*, unsigned length); | 331 PassRefPtr<StringImpl> stripWhiteSpace(); |
| 207 static PassRefPtr<StringImpl> create8BitIfPossible(const UChar*, unsigned le
ngth); | 332 PassRefPtr<StringImpl> stripWhiteSpace(IsWhiteSpaceFunctionPtr); |
| 208 template<size_t inlineCapacity> | 333 PassRefPtr<StringImpl> simplifyWhiteSpace(StripBehavior = StripExtraWhiteSpace
); |
| 209 static PassRefPtr<StringImpl> create8BitIfPossible(const Vector<UChar, inlin
eCapacity>& vector) | 334 PassRefPtr<StringImpl> simplifyWhiteSpace(IsWhiteSpaceFunctionPtr, StripBehavi
or = StripExtraWhiteSpace); |
| 210 { | 335 |
| 211 return create8BitIfPossible(vector.data(), vector.size()); | 336 PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr); |
| 212 } | 337 template <typename CharType> |
| 213 | 338 ALWAYS_INLINE PassRefPtr<StringImpl> removeCharacters(const CharType* characte
rs, CharacterMatchFunctionPtr); |
| 214 ALWAYS_INLINE static PassRefPtr<StringImpl> create(const char* s, unsigned l
ength) { return create(reinterpret_cast<const LChar*>(s), length); } | 339 |
| 215 static PassRefPtr<StringImpl> create(const LChar*); | 340 size_t find(LChar character, unsigned start = 0); |
| 216 ALWAYS_INLINE static PassRefPtr<StringImpl> create(const char* s) { return c
reate(reinterpret_cast<const LChar*>(s)); } | 341 size_t find(char character, unsigned start = 0); |
| 217 | 342 size_t find(UChar character, unsigned start = 0); |
| 218 static PassRefPtr<StringImpl> createUninitialized(unsigned length, LChar*& d
ata); | 343 size_t find(CharacterMatchFunctionPtr, unsigned index = 0); |
| 219 static PassRefPtr<StringImpl> createUninitialized(unsigned length, UChar*& d
ata); | 344 size_t find(const LChar*, unsigned index = 0); |
| 220 | 345 ALWAYS_INLINE size_t find(const char* s, unsigned index = 0) { return find(rei
nterpret_cast<const LChar*>(s), index); } |
| 221 unsigned length() const { return m_length; } | 346 size_t find(StringImpl*); |
| 222 bool is8Bit() const { return m_is8Bit; } | 347 size_t find(StringImpl*, unsigned index); |
| 223 | 348 size_t findIgnoringCase(const LChar*, unsigned index = 0); |
| 224 ALWAYS_INLINE const LChar* characters8() const { ASSERT(is8Bit()); return re
interpret_cast<const LChar*>(this + 1); } | 349 ALWAYS_INLINE size_t findIgnoringCase(const char* s, unsigned index = 0) { ret
urn findIgnoringCase(reinterpret_cast<const LChar*>(s), index); } |
| 225 ALWAYS_INLINE const UChar* characters16() const { ASSERT(!is8Bit()); return
reinterpret_cast<const UChar*>(this + 1); } | 350 size_t findIgnoringCase(StringImpl*, unsigned index = 0); |
| 226 | 351 |
| 227 template <typename CharType> | 352 size_t findNextLineStart(unsigned index = UINT_MAX); |
| 228 ALWAYS_INLINE const CharType * getCharacters() const; | 353 |
| 229 | 354 size_t reverseFind(UChar, unsigned index = UINT_MAX); |
| 230 size_t sizeInBytes() const; | 355 size_t reverseFind(StringImpl*, unsigned index = UINT_MAX); |
| 231 | 356 size_t reverseFindIgnoringCase(StringImpl*, unsigned index = UINT_MAX); |
| 232 bool isAtomic() const { return m_isAtomic; } | 357 |
| 233 void setIsAtomic(bool isAtomic) { m_isAtomic = isAtomic; } | 358 size_t count(LChar) const; |
| 234 | 359 |
| 235 bool isStatic() const { return m_isStatic; } | 360 bool startsWith(StringImpl* str, TextCaseSensitivity caseSensitivity = TextCas
eSensitive) { return ((caseSensitivity == TextCaseSensitive) ? reverseFind(str,
0) : reverseFindIgnoringCase(str, 0)) == 0; } |
| 236 | 361 bool startsWith(UChar) const; |
| 237 private: | 362 bool startsWith(const char*, unsigned matchLength, TextCaseSensitivity) const; |
| 238 // The high bits of 'hash' are always empty, but we prefer to store our | 363 template <unsigned matchLength> |
| 239 // flags in the low bits because it makes them slightly more efficient to | 364 bool startsWith(const char(&prefix)[matchLength], TextCaseSensitivity caseSens
itivity = TextCaseSensitive) const { return startsWith(prefix, matchLength - 1,
caseSensitivity); } |
| 240 // access. So, we shift left and right when setting and getting our hash | 365 |
| 241 // code. | 366 bool endsWith(StringImpl*, TextCaseSensitivity = TextCaseSensitive); |
| 242 void setHash(unsigned hash) const | 367 bool endsWith(UChar) const; |
| 243 { | 368 bool endsWith(const char*, unsigned matchLength, TextCaseSensitivity) const; |
| 244 ASSERT(!hasHash()); | 369 template <unsigned matchLength> |
| 245 // Multiple clients assume that StringHasher is the canonical string | 370 bool endsWith(const char(&prefix)[matchLength], TextCaseSensitivity caseSensit
ivity = TextCaseSensitive) const { return endsWith(prefix, matchLength - 1, case
Sensitivity); } |
| 246 // hash function. | 371 |
| 247 ASSERT(hash == (is8Bit() ? StringHasher::computeHashAndMaskTop8Bits(char
acters8(), m_length) : StringHasher::computeHashAndMaskTop8Bits(characters16(),
m_length))); | 372 PassRefPtr<StringImpl> replace(UChar, UChar); |
| 248 m_hash = hash; | 373 PassRefPtr<StringImpl> replace(UChar, StringImpl*); |
| 249 ASSERT(hash); // Verify that 0 is a valid sentinel hash value. | 374 ALWAYS_INLINE PassRefPtr<StringImpl> replace(UChar pattern, const char* replac
ement, unsigned replacementLength) { return replace(pattern, reinterpret_cast<co
nst LChar*>(replacement), replacementLength); } |
| 250 } | 375 PassRefPtr<StringImpl> replace(UChar, const LChar*, unsigned replacementLength
); |
| 251 | 376 PassRefPtr<StringImpl> replace(UChar, const UChar*, unsigned replacementLength
); |
| 252 unsigned rawHash() const | 377 PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*); |
| 253 { | 378 PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*); |
| 254 return m_hash; | 379 PassRefPtr<StringImpl> upconvertedString(); |
| 255 } | |
| 256 | |
| 257 void destroyIfNotStatic(); | |
| 258 | |
| 259 public: | |
| 260 bool hasHash() const | |
| 261 { | |
| 262 return rawHash() != 0; | |
| 263 } | |
| 264 | |
| 265 unsigned existingHash() const | |
| 266 { | |
| 267 ASSERT(hasHash()); | |
| 268 return rawHash(); | |
| 269 } | |
| 270 | |
| 271 unsigned hash() const | |
| 272 { | |
| 273 if (hasHash()) | |
| 274 return existingHash(); | |
| 275 return hashSlowCase(); | |
| 276 } | |
| 277 | |
| 278 ALWAYS_INLINE bool hasOneRef() const | |
| 279 { | |
| 280 return m_refCount == 1; | |
| 281 } | |
| 282 | |
| 283 ALWAYS_INLINE void ref() | |
| 284 { | |
| 285 ++m_refCount; | |
| 286 } | |
| 287 | |
| 288 ALWAYS_INLINE void deref() | |
| 289 { | |
| 290 if (hasOneRef()) { | |
| 291 destroyIfNotStatic(); | |
| 292 return; | |
| 293 } | |
| 294 | |
| 295 --m_refCount; | |
| 296 } | |
| 297 | |
| 298 static StringImpl* empty(); | |
| 299 static StringImpl* empty16Bit(); | |
| 300 | |
| 301 // FIXME: Does this really belong in StringImpl? | |
| 302 template <typename T> static void copyChars(T* destination, const T* source,
unsigned numCharacters) | |
| 303 { | |
| 304 memcpy(destination, source, numCharacters * sizeof(T)); | |
| 305 } | |
| 306 | |
| 307 ALWAYS_INLINE static void copyChars(UChar* destination, const LChar* source,
unsigned numCharacters) | |
| 308 { | |
| 309 for (unsigned i = 0; i < numCharacters; ++i) | |
| 310 destination[i] = source[i]; | |
| 311 } | |
| 312 | |
| 313 // Some string features, like refcounting and the atomicity flag, are not | |
| 314 // thread-safe. We achieve thread safety by isolation, giving each thread | |
| 315 // its own copy of the string. | |
| 316 PassRefPtr<StringImpl> isolatedCopy() const; | |
| 317 | |
| 318 PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX); | |
| 319 | |
| 320 UChar operator[](unsigned i) const | |
| 321 { | |
| 322 ASSERT_WITH_SECURITY_IMPLICATION(i < m_length); | |
| 323 if (is8Bit()) | |
| 324 return characters8()[i]; | |
| 325 return characters16()[i]; | |
| 326 } | |
| 327 UChar32 characterStartingAt(unsigned); | |
| 328 | |
| 329 bool containsOnlyWhitespace(); | |
| 330 | |
| 331 int toIntStrict(bool* ok = 0, int base = 10); | |
| 332 unsigned toUIntStrict(bool* ok = 0, int base = 10); | |
| 333 int64_t toInt64Strict(bool* ok = 0, int base = 10); | |
| 334 uint64_t toUInt64Strict(bool* ok = 0, int base = 10); | |
| 335 | |
| 336 int toInt(bool* ok = 0); // ignores trailing garbage | |
| 337 unsigned toUInt(bool* ok = 0); // ignores trailing garbage | |
| 338 int64_t toInt64(bool* ok = 0); // ignores trailing garbage | |
| 339 uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage | |
| 340 | |
| 341 // FIXME: Like the strict functions above, these give false for "ok" when | |
| 342 // there is trailing garbage. Like the non-strict functions above, these | |
| 343 // return the value when there is trailing garbage. It would be better if | |
| 344 // these were more consistent with the above functions instead. | |
| 345 double toDouble(bool* ok = 0); | |
| 346 float toFloat(bool* ok = 0); | |
| 347 | |
| 348 PassRefPtr<StringImpl> lower(); | |
| 349 PassRefPtr<StringImpl> upper(); | |
| 350 PassRefPtr<StringImpl> lower(const AtomicString& localeIdentifier); | |
| 351 PassRefPtr<StringImpl> upper(const AtomicString& localeIdentifier); | |
| 352 | |
| 353 PassRefPtr<StringImpl> fill(UChar); | |
| 354 // FIXME: Do we need fill(char) or can we just do the right thing if UChar i
s ASCII? | |
| 355 PassRefPtr<StringImpl> foldCase(); | |
| 356 | |
| 357 PassRefPtr<StringImpl> stripWhiteSpace(); | |
| 358 PassRefPtr<StringImpl> stripWhiteSpace(IsWhiteSpaceFunctionPtr); | |
| 359 PassRefPtr<StringImpl> simplifyWhiteSpace(StripBehavior = StripExtraWhiteSpa
ce); | |
| 360 PassRefPtr<StringImpl> simplifyWhiteSpace(IsWhiteSpaceFunctionPtr, StripBeha
vior = StripExtraWhiteSpace); | |
| 361 | |
| 362 PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr); | |
| 363 template <typename CharType> | |
| 364 ALWAYS_INLINE PassRefPtr<StringImpl> removeCharacters(const CharType* charac
ters, CharacterMatchFunctionPtr); | |
| 365 | |
| 366 size_t find(LChar character, unsigned start = 0); | |
| 367 size_t find(char character, unsigned start = 0); | |
| 368 size_t find(UChar character, unsigned start = 0); | |
| 369 size_t find(CharacterMatchFunctionPtr, unsigned index = 0); | |
| 370 size_t find(const LChar*, unsigned index = 0); | |
| 371 ALWAYS_INLINE size_t find(const char* s, unsigned index = 0) { return find(r
einterpret_cast<const LChar*>(s), index); } | |
| 372 size_t find(StringImpl*); | |
| 373 size_t find(StringImpl*, unsigned index); | |
| 374 size_t findIgnoringCase(const LChar*, unsigned index = 0); | |
| 375 ALWAYS_INLINE size_t findIgnoringCase(const char* s, unsigned index = 0) { r
eturn findIgnoringCase(reinterpret_cast<const LChar*>(s), index); } | |
| 376 size_t findIgnoringCase(StringImpl*, unsigned index = 0); | |
| 377 | |
| 378 size_t findNextLineStart(unsigned index = UINT_MAX); | |
| 379 | |
| 380 size_t reverseFind(UChar, unsigned index = UINT_MAX); | |
| 381 size_t reverseFind(StringImpl*, unsigned index = UINT_MAX); | |
| 382 size_t reverseFindIgnoringCase(StringImpl*, unsigned index = UINT_MAX); | |
| 383 | |
| 384 size_t count(LChar) const; | |
| 385 | |
| 386 bool startsWith(StringImpl* str, TextCaseSensitivity caseSensitivity = TextC
aseSensitive) { return ((caseSensitivity == TextCaseSensitive) ? reverseFind(str
, 0) : reverseFindIgnoringCase(str, 0)) == 0; } | |
| 387 bool startsWith(UChar) const; | |
| 388 bool startsWith(const char*, unsigned matchLength, TextCaseSensitivity) cons
t; | |
| 389 template<unsigned matchLength> | |
| 390 bool startsWith(const char (&prefix)[matchLength], TextCaseSensitivity caseS
ensitivity = TextCaseSensitive) const { return startsWith(prefix, matchLength -
1, caseSensitivity); } | |
| 391 | |
| 392 bool endsWith(StringImpl*, TextCaseSensitivity = TextCaseSensitive); | |
| 393 bool endsWith(UChar) const; | |
| 394 bool endsWith(const char*, unsigned matchLength, TextCaseSensitivity) const; | |
| 395 template<unsigned matchLength> | |
| 396 bool endsWith(const char (&prefix)[matchLength], TextCaseSensitivity caseSen
sitivity = TextCaseSensitive) const { return endsWith(prefix, matchLength - 1, c
aseSensitivity); } | |
| 397 | |
| 398 PassRefPtr<StringImpl> replace(UChar, UChar); | |
| 399 PassRefPtr<StringImpl> replace(UChar, StringImpl*); | |
| 400 ALWAYS_INLINE PassRefPtr<StringImpl> replace(UChar pattern, const char* repl
acement, unsigned replacementLength) { return replace(pattern, reinterpret_cast<
const LChar*>(replacement), replacementLength); } | |
| 401 PassRefPtr<StringImpl> replace(UChar, const LChar*, unsigned replacementLeng
th); | |
| 402 PassRefPtr<StringImpl> replace(UChar, const UChar*, unsigned replacementLeng
th); | |
| 403 PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*); | |
| 404 PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*); | |
| 405 PassRefPtr<StringImpl> upconvertedString(); | |
| 406 | 380 |
| 407 #if OS(MACOSX) | 381 #if OS(MACOSX) |
| 408 RetainPtr<CFStringRef> createCFString(); | 382 RetainPtr<CFStringRef> createCFString(); |
| 409 #endif | 383 #endif |
| 410 #ifdef __OBJC__ | 384 #ifdef __OBJC__ |
| 411 operator NSString*(); | 385 operator NSString*(); |
| 412 #endif | 386 #endif |
| 413 | 387 |
| 414 #ifdef STRING_STATS | 388 #ifdef STRING_STATS |
| 415 ALWAYS_INLINE static StringStats& stringStats() { return m_stringStats; } | 389 ALWAYS_INLINE static StringStats& stringStats() { return m_stringStats; } |
| 416 #endif | 390 #endif |
| 417 static const UChar latin1CaseFoldTable[256]; | 391 static const UChar latin1CaseFoldTable[256]; |
| 418 | 392 |
| 419 private: | 393 private: |
| 420 template<typename CharType> static size_t allocationSize(unsigned length) | 394 template <typename CharType> |
| 421 { | 395 static size_t allocationSize(unsigned length) { |
| 422 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof
(StringImpl)) / sizeof(CharType))); | 396 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(Str
ingImpl)) / sizeof(CharType))); |
| 423 return sizeof(StringImpl) + length * sizeof(CharType); | 397 return sizeof(StringImpl) + length * sizeof(CharType); |
| 424 } | 398 } |
| 425 | 399 |
| 426 template <class UCharPredicate> PassRefPtr<StringImpl> stripMatchedCharacter
s(UCharPredicate); | 400 template <class UCharPredicate> |
| 427 template <typename CharType, class UCharPredicate> PassRefPtr<StringImpl> si
mplifyMatchedCharactersToSpace(UCharPredicate, StripBehavior); | 401 PassRefPtr<StringImpl> stripMatchedCharacters(UCharPredicate); |
| 428 NEVER_INLINE unsigned hashSlowCase() const; | 402 template <typename CharType, class UCharPredicate> |
| 403 PassRefPtr<StringImpl> simplifyMatchedCharactersToSpace(UCharPredicate, StripB
ehavior); |
| 404 NEVER_INLINE unsigned hashSlowCase() const; |
| 429 | 405 |
| 430 #ifdef STRING_STATS | 406 #ifdef STRING_STATS |
| 431 static StringStats m_stringStats; | 407 static StringStats m_stringStats; |
| 432 #endif | 408 #endif |
| 433 | 409 |
| 434 static unsigned m_highestStaticStringLength; | 410 static unsigned m_highestStaticStringLength; |
| 435 | 411 |
| 436 #if ENABLE(ASSERT) | 412 #if ENABLE(ASSERT) |
| 437 void assertHashIsCorrect() | 413 void assertHashIsCorrect() { |
| 438 { | 414 ASSERT(hasHash()); |
| 439 ASSERT(hasHash()); | 415 ASSERT(existingHash() == StringHasher::computeHashAndMaskTop8Bits(characters
8(), length())); |
| 440 ASSERT(existingHash() == StringHasher::computeHashAndMaskTop8Bits(charac
ters8(), length())); | 416 } |
| 441 } | |
| 442 #endif | 417 #endif |
| 443 | 418 |
| 444 private: | 419 private: |
| 445 unsigned m_refCount; | 420 unsigned m_refCount; |
| 446 const unsigned m_length; | 421 const unsigned m_length; |
| 447 mutable unsigned m_hash : 24; | 422 mutable unsigned m_hash : 24; |
| 448 unsigned m_isAtomic : 1; | 423 unsigned m_isAtomic : 1; |
| 449 const unsigned m_is8Bit : 1; | 424 const unsigned m_is8Bit : 1; |
| 450 const unsigned m_isStatic : 1; | 425 const unsigned m_isStatic : 1; |
| 451 }; | 426 }; |
| 452 | 427 |
| 453 template <> | 428 template <> |
| 454 ALWAYS_INLINE const LChar* StringImpl::getCharacters<LChar>() const { return cha
racters8(); } | 429 ALWAYS_INLINE const LChar* StringImpl::getCharacters<LChar>() const { |
| 430 return characters8(); |
| 431 } |
| 455 | 432 |
| 456 template <> | 433 template <> |
| 457 ALWAYS_INLINE const UChar* StringImpl::getCharacters<UChar>() const { return cha
racters16(); } | 434 ALWAYS_INLINE const UChar* StringImpl::getCharacters<UChar>() const { |
| 435 return characters16(); |
| 436 } |
| 458 | 437 |
| 459 WTF_EXPORT bool equal(const StringImpl*, const StringImpl*); | 438 WTF_EXPORT bool equal(const StringImpl*, const StringImpl*); |
| 460 WTF_EXPORT bool equal(const StringImpl*, const LChar*); | 439 WTF_EXPORT bool equal(const StringImpl*, const LChar*); |
| 461 inline bool equal(const StringImpl* a, const char* b) { return equal(a, reinterp
ret_cast<const LChar*>(b)); } | 440 inline bool equal(const StringImpl* a, const char* b) { |
| 441 return equal(a, reinterpret_cast<const LChar*>(b)); |
| 442 } |
| 462 WTF_EXPORT bool equal(const StringImpl*, const LChar*, unsigned); | 443 WTF_EXPORT bool equal(const StringImpl*, const LChar*, unsigned); |
| 463 WTF_EXPORT bool equal(const StringImpl*, const UChar*, unsigned); | 444 WTF_EXPORT bool equal(const StringImpl*, const UChar*, unsigned); |
| 464 inline bool equal(const StringImpl* a, const char* b, unsigned length) { return
equal(a, reinterpret_cast<const LChar*>(b), length); } | 445 inline bool equal(const StringImpl* a, const char* b, unsigned length) { |
| 465 inline bool equal(const LChar* a, StringImpl* b) { return equal(b, a); } | 446 return equal(a, reinterpret_cast<const LChar*>(b), length); |
| 466 inline bool equal(const char* a, StringImpl* b) { return equal(b, reinterpret_ca
st<const LChar*>(a)); } | 447 } |
| 448 inline bool equal(const LChar* a, StringImpl* b) { |
| 449 return equal(b, a); |
| 450 } |
| 451 inline bool equal(const char* a, StringImpl* b) { |
| 452 return equal(b, reinterpret_cast<const LChar*>(a)); |
| 453 } |
| 467 WTF_EXPORT bool equalNonNull(const StringImpl* a, const StringImpl* b); | 454 WTF_EXPORT bool equalNonNull(const StringImpl* a, const StringImpl* b); |
| 468 | 455 |
| 469 template<typename CharType> | 456 template <typename CharType> |
| 470 ALWAYS_INLINE bool equal(const CharType* a, const CharType* b, unsigned length)
{ return !memcmp(a, b, length * sizeof(CharType)); } | 457 ALWAYS_INLINE bool equal(const CharType* a, const CharType* b, unsigned length)
{ |
| 471 | 458 return !memcmp(a, b, length * sizeof(CharType)); |
| 472 ALWAYS_INLINE bool equal(const LChar* a, const UChar* b, unsigned length) | 459 } |
| 473 { | 460 |
| 474 for (unsigned i = 0; i < length; ++i) { | 461 ALWAYS_INLINE bool equal(const LChar* a, const UChar* b, unsigned length) { |
| 475 if (a[i] != b[i]) | 462 for (unsigned i = 0; i < length; ++i) { |
| 476 return false; | 463 if (a[i] != b[i]) |
| 477 } | 464 return false; |
| 478 return true; | 465 } |
| 479 } | 466 return true; |
| 480 | 467 } |
| 481 ALWAYS_INLINE bool equal(const UChar* a, const LChar* b, unsigned length) { retu
rn equal(b, a, length); } | 468 |
| 469 ALWAYS_INLINE bool equal(const UChar* a, const LChar* b, unsigned length) { |
| 470 return equal(b, a, length); |
| 471 } |
| 482 | 472 |
| 483 WTF_EXPORT bool equalIgnoringCase(const StringImpl*, const StringImpl*); | 473 WTF_EXPORT bool equalIgnoringCase(const StringImpl*, const StringImpl*); |
| 484 WTF_EXPORT bool equalIgnoringCase(const StringImpl*, const LChar*); | 474 WTF_EXPORT bool equalIgnoringCase(const StringImpl*, const LChar*); |
| 485 inline bool equalIgnoringCase(const LChar* a, const StringImpl* b) { return equa
lIgnoringCase(b, a); } | 475 inline bool equalIgnoringCase(const LChar* a, const StringImpl* b) { |
| 476 return equalIgnoringCase(b, a); |
| 477 } |
| 486 WTF_EXPORT bool equalIgnoringCase(const LChar*, const LChar*, unsigned); | 478 WTF_EXPORT bool equalIgnoringCase(const LChar*, const LChar*, unsigned); |
| 487 WTF_EXPORT bool equalIgnoringCase(const UChar*, const LChar*, unsigned); | 479 WTF_EXPORT bool equalIgnoringCase(const UChar*, const LChar*, unsigned); |
| 488 inline bool equalIgnoringCase(const UChar* a, const char* b, unsigned length) {
return equalIgnoringCase(a, reinterpret_cast<const LChar*>(b), length); } | 480 inline bool equalIgnoringCase(const UChar* a, const char* b, unsigned length) { |
| 489 inline bool equalIgnoringCase(const LChar* a, const UChar* b, unsigned length) {
return equalIgnoringCase(b, a, length); } | 481 return equalIgnoringCase(a, reinterpret_cast<const LChar*>(b), length); |
| 490 inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) {
return equalIgnoringCase(b, reinterpret_cast<const LChar*>(a), length); } | 482 } |
| 491 inline bool equalIgnoringCase(const char* a, const LChar* b, unsigned length) {
return equalIgnoringCase(b, reinterpret_cast<const LChar*>(a), length); } | 483 inline bool equalIgnoringCase(const LChar* a, const UChar* b, unsigned length) { |
| 492 inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length) | 484 return equalIgnoringCase(b, a, length); |
| 493 { | 485 } |
| 494 ASSERT(length >= 0); | 486 inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { |
| 495 return !Unicode::umemcasecmp(a, b, length); | 487 return equalIgnoringCase(b, reinterpret_cast<const LChar*>(a), length); |
| 488 } |
| 489 inline bool equalIgnoringCase(const char* a, const LChar* b, unsigned length) { |
| 490 return equalIgnoringCase(b, reinterpret_cast<const LChar*>(a), length); |
| 491 } |
| 492 inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length) { |
| 493 ASSERT(length >= 0); |
| 494 return !Unicode::umemcasecmp(a, b, length); |
| 496 } | 495 } |
| 497 WTF_EXPORT bool equalIgnoringCaseNonNull(const StringImpl*, const StringImpl*); | 496 WTF_EXPORT bool equalIgnoringCaseNonNull(const StringImpl*, const StringImpl*); |
| 498 | 497 |
| 499 WTF_EXPORT bool equalIgnoringNullity(StringImpl*, StringImpl*); | 498 WTF_EXPORT bool equalIgnoringNullity(StringImpl*, StringImpl*); |
| 500 | 499 |
| 501 template<typename CharacterTypeA, typename CharacterTypeB> | 500 template <typename CharacterTypeA, typename CharacterTypeB> |
| 502 inline bool equalIgnoringASCIICase(const CharacterTypeA* a, const CharacterTypeB
* b, unsigned length) | 501 inline bool equalIgnoringASCIICase(const CharacterTypeA* a, const CharacterTypeB
* b, unsigned length) { |
| 503 { | 502 for (unsigned i = 0; i < length; ++i) { |
| 504 for (unsigned i = 0; i < length; ++i) { | 503 if (toASCIILower(a[i]) != toASCIILower(b[i])) |
| 505 if (toASCIILower(a[i]) != toASCIILower(b[i])) | 504 return false; |
| 506 return false; | 505 } |
| 506 return true; |
| 507 } |
| 508 |
| 509 template <typename CharacterTypeA, typename CharacterTypeB> |
| 510 bool startsWithIgnoringASCIICase(const CharacterTypeA& reference, const Characte
rTypeB& prefix) { |
| 511 unsigned prefixLength = prefix.length(); |
| 512 if (prefixLength > reference.length()) |
| 513 return false; |
| 514 |
| 515 if (reference.is8Bit()) { |
| 516 if (prefix.is8Bit()) |
| 517 return equalIgnoringASCIICase(reference.characters8(), prefix.characters8(
), prefixLength); |
| 518 return equalIgnoringASCIICase(reference.characters8(), prefix.characters16()
, prefixLength); |
| 519 } |
| 520 if (prefix.is8Bit()) |
| 521 return equalIgnoringASCIICase(reference.characters16(), prefix.characters8()
, prefixLength); |
| 522 return equalIgnoringASCIICase(reference.characters16(), prefix.characters16(),
prefixLength); |
| 523 } |
| 524 |
| 525 template <typename CharacterType> |
| 526 inline size_t find(const CharacterType* characters, unsigned length, CharacterTy
pe matchCharacter, unsigned index = 0) { |
| 527 while (index < length) { |
| 528 if (characters[index] == matchCharacter) |
| 529 return index; |
| 530 ++index; |
| 531 } |
| 532 return kNotFound; |
| 533 } |
| 534 |
| 535 ALWAYS_INLINE size_t find(const UChar* characters, unsigned length, LChar matchC
haracter, unsigned index = 0) { |
| 536 return find(characters, length, static_cast<UChar>(matchCharacter), index); |
| 537 } |
| 538 |
| 539 inline size_t find(const LChar* characters, unsigned length, UChar matchCharacte
r, unsigned index = 0) { |
| 540 if (matchCharacter & ~0xFF) |
| 541 return kNotFound; |
| 542 return find(characters, length, static_cast<LChar>(matchCharacter), index); |
| 543 } |
| 544 |
| 545 inline size_t find(const LChar* characters, unsigned length, CharacterMatchFunct
ionPtr matchFunction, unsigned index = 0) { |
| 546 while (index < length) { |
| 547 if (matchFunction(characters[index])) |
| 548 return index; |
| 549 ++index; |
| 550 } |
| 551 return kNotFound; |
| 552 } |
| 553 |
| 554 inline size_t find(const UChar* characters, unsigned length, CharacterMatchFunct
ionPtr matchFunction, unsigned index = 0) { |
| 555 while (index < length) { |
| 556 if (matchFunction(characters[index])) |
| 557 return index; |
| 558 ++index; |
| 559 } |
| 560 return kNotFound; |
| 561 } |
| 562 |
| 563 template <typename CharacterType> |
| 564 inline size_t findNextLineStart(const CharacterType* characters, unsigned length
, unsigned index = 0) { |
| 565 while (index < length) { |
| 566 CharacterType c = characters[index++]; |
| 567 if ((c != '\n') && (c != '\r')) |
| 568 continue; |
| 569 |
| 570 // There can only be a start of a new line if there are more characters |
| 571 // beyond the current character. |
| 572 if (index < length) { |
| 573 // The 3 common types of line terminators are 1. \r\n (Windows), |
| 574 // 2. \r (old MacOS) and 3. \n (Unix'es). |
| 575 |
| 576 if (c == '\n') |
| 577 return index; // Case 3: just \n. |
| 578 |
| 579 CharacterType c2 = characters[index]; |
| 580 if (c2 != '\n') |
| 581 return index; // Case 2: just \r. |
| 582 |
| 583 // Case 1: \r\n. |
| 584 // But, there's only a start of a new line if there are more |
| 585 // characters beyond the \r\n. |
| 586 if (++index < length) |
| 587 return index; |
| 507 } | 588 } |
| 508 return true; | 589 } |
| 509 } | 590 return kNotFound; |
| 510 | 591 } |
| 511 template<typename CharacterTypeA, typename CharacterTypeB> | 592 |
| 512 bool startsWithIgnoringASCIICase(const CharacterTypeA& reference, const Characte
rTypeB& prefix) | 593 template <typename CharacterType> |
| 513 { | 594 inline size_t reverseFindLineTerminator(const CharacterType* characters, unsigne
d length, unsigned index = UINT_MAX) { |
| 514 unsigned prefixLength = prefix.length(); | 595 if (!length) |
| 515 if (prefixLength > reference.length()) | |
| 516 return false; | |
| 517 | |
| 518 if (reference.is8Bit()) { | |
| 519 if (prefix.is8Bit()) | |
| 520 return equalIgnoringASCIICase(reference.characters8(), prefix.charac
ters8(), prefixLength); | |
| 521 return equalIgnoringASCIICase(reference.characters8(), prefix.characters
16(), prefixLength); | |
| 522 } | |
| 523 if (prefix.is8Bit()) | |
| 524 return equalIgnoringASCIICase(reference.characters16(), prefix.character
s8(), prefixLength); | |
| 525 return equalIgnoringASCIICase(reference.characters16(), prefix.characters16(
), prefixLength); | |
| 526 } | |
| 527 | |
| 528 template<typename CharacterType> | |
| 529 inline size_t find(const CharacterType* characters, unsigned length, CharacterTy
pe matchCharacter, unsigned index = 0) | |
| 530 { | |
| 531 while (index < length) { | |
| 532 if (characters[index] == matchCharacter) | |
| 533 return index; | |
| 534 ++index; | |
| 535 } | |
| 536 return kNotFound; | 596 return kNotFound; |
| 537 } | 597 if (index >= length) |
| 538 | 598 index = length - 1; |
| 539 ALWAYS_INLINE size_t find(const UChar* characters, unsigned length, LChar matchC
haracter, unsigned index = 0) | 599 CharacterType c = characters[index]; |
| 540 { | 600 while ((c != '\n') && (c != '\r')) { |
| 541 return find(characters, length, static_cast<UChar>(matchCharacter), index); | 601 if (!index--) |
| 542 } | 602 return kNotFound; |
| 543 | 603 c = characters[index]; |
| 544 inline size_t find(const LChar* characters, unsigned length, UChar matchCharacte
r, unsigned index = 0) | 604 } |
| 545 { | 605 return index; |
| 546 if (matchCharacter & ~0xFF) | 606 } |
| 547 return kNotFound; | 607 |
| 548 return find(characters, length, static_cast<LChar>(matchCharacter), index); | 608 template <typename CharacterType> |
| 549 } | 609 inline size_t reverseFind(const CharacterType* characters, unsigned length, Char
acterType matchCharacter, unsigned index = UINT_MAX) { |
| 550 | 610 if (!length) |
| 551 inline size_t find(const LChar* characters, unsigned length, CharacterMatchFunct
ionPtr matchFunction, unsigned index = 0) | |
| 552 { | |
| 553 while (index < length) { | |
| 554 if (matchFunction(characters[index])) | |
| 555 return index; | |
| 556 ++index; | |
| 557 } | |
| 558 return kNotFound; | 611 return kNotFound; |
| 559 } | 612 if (index >= length) |
| 560 | 613 index = length - 1; |
| 561 inline size_t find(const UChar* characters, unsigned length, CharacterMatchFunct
ionPtr matchFunction, unsigned index = 0) | 614 while (characters[index] != matchCharacter) { |
| 562 { | 615 if (!index--) |
| 563 while (index < length) { | 616 return kNotFound; |
| 564 if (matchFunction(characters[index])) | 617 } |
| 565 return index; | 618 return index; |
| 566 ++index; | 619 } |
| 567 } | 620 |
| 621 ALWAYS_INLINE size_t reverseFind(const UChar* characters, unsigned length, LChar
matchCharacter, unsigned index = UINT_MAX) { |
| 622 return reverseFind(characters, length, static_cast<UChar>(matchCharacter), ind
ex); |
| 623 } |
| 624 |
| 625 inline size_t reverseFind(const LChar* characters, unsigned length, UChar matchC
haracter, unsigned index = UINT_MAX) { |
| 626 if (matchCharacter & ~0xFF) |
| 568 return kNotFound; | 627 return kNotFound; |
| 569 } | 628 return reverseFind(characters, length, static_cast<LChar>(matchCharacter), ind
ex); |
| 570 | 629 } |
| 571 template<typename CharacterType> | 630 |
| 572 inline size_t findNextLineStart(const CharacterType* characters, unsigned length
, unsigned index = 0) | 631 inline size_t StringImpl::find(LChar character, unsigned start) { |
| 573 { | 632 if (is8Bit()) |
| 574 while (index < length) { | 633 return WTF::find(characters8(), m_length, character, start); |
| 575 CharacterType c = characters[index++]; | 634 return WTF::find(characters16(), m_length, character, start); |
| 576 if ((c != '\n') && (c != '\r')) | 635 } |
| 577 continue; | 636 |
| 578 | 637 ALWAYS_INLINE size_t StringImpl::find(char character, unsigned start) { |
| 579 // There can only be a start of a new line if there are more characters | 638 return find(static_cast<LChar>(character), start); |
| 580 // beyond the current character. | 639 } |
| 581 if (index < length) { | 640 |
| 582 // The 3 common types of line terminators are 1. \r\n (Windows), | 641 inline size_t StringImpl::find(UChar character, unsigned start) { |
| 583 // 2. \r (old MacOS) and 3. \n (Unix'es). | 642 if (is8Bit()) |
| 584 | 643 return WTF::find(characters8(), m_length, character, start); |
| 585 if (c == '\n') | 644 return WTF::find(characters16(), m_length, character, start); |
| 586 return index; // Case 3: just \n. | 645 } |
| 587 | 646 |
| 588 CharacterType c2 = characters[index]; | 647 inline unsigned lengthOfNullTerminatedString(const UChar* string) { |
| 589 if (c2 != '\n') | 648 size_t length = 0; |
| 590 return index; // Case 2: just \r. | 649 while (string[length] != UChar(0)) |
| 591 | 650 ++length; |
| 592 // Case 1: \r\n. | 651 RELEASE_ASSERT(length <= std::numeric_limits<unsigned>::max()); |
| 593 // But, there's only a start of a new line if there are more | 652 return static_cast<unsigned>(length); |
| 594 // characters beyond the \r\n. | 653 } |
| 595 if (++index < length) | 654 |
| 596 return index; | 655 template <size_t inlineCapacity> |
| 597 } | 656 bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, StringImpl* b)
{ |
| 598 } | 657 if (!b) |
| 599 return kNotFound; | 658 return !a.size(); |
| 600 } | 659 if (a.size() != b->length()) |
| 601 | 660 return false; |
| 602 template<typename CharacterType> | 661 if (b->is8Bit()) |
| 603 inline size_t reverseFindLineTerminator(const CharacterType* characters, unsigne
d length, unsigned index = UINT_MAX) | 662 return equal(a.data(), b->characters8(), b->length()); |
| 604 { | 663 return equal(a.data(), b->characters16(), b->length()); |
| 605 if (!length) | 664 } |
| 606 return kNotFound; | 665 |
| 607 if (index >= length) | 666 template <typename CharacterType1, typename CharacterType2> |
| 608 index = length - 1; | 667 static inline int codePointCompare(unsigned l1, unsigned l2, const CharacterType
1* c1, const CharacterType2* c2) { |
| 609 CharacterType c = characters[index]; | 668 const unsigned lmin = l1 < l2 ? l1 : l2; |
| 610 while ((c != '\n') && (c != '\r')) { | 669 unsigned pos = 0; |
| 611 if (!index--) | 670 while (pos < lmin && *c1 == *c2) { |
| 612 return kNotFound; | 671 ++c1; |
| 613 c = characters[index]; | 672 ++c2; |
| 614 } | 673 ++pos; |
| 615 return index; | 674 } |
| 616 } | 675 |
| 617 | 676 if (pos < lmin) |
| 618 template<typename CharacterType> | 677 return (c1[0] > c2[0]) ? 1 : -1; |
| 619 inline size_t reverseFind(const CharacterType* characters, unsigned length, Char
acterType matchCharacter, unsigned index = UINT_MAX) | 678 |
| 620 { | 679 if (l1 == l2) |
| 621 if (!length) | 680 return 0; |
| 622 return kNotFound; | 681 |
| 623 if (index >= length) | 682 return (l1 > l2) ? 1 : -1; |
| 624 index = length - 1; | 683 } |
| 625 while (characters[index] != matchCharacter) { | 684 |
| 626 if (!index--) | 685 static inline int codePointCompare8(const StringImpl* string1, const StringImpl*
string2) { |
| 627 return kNotFound; | 686 return codePointCompare(string1->length(), string2->length(), string1->charact
ers8(), string2->characters8()); |
| 628 } | 687 } |
| 629 return index; | 688 |
| 630 } | 689 static inline int codePointCompare16(const StringImpl* string1, const StringImpl
* string2) { |
| 631 | 690 return codePointCompare(string1->length(), string2->length(), string1->charact
ers16(), string2->characters16()); |
| 632 ALWAYS_INLINE size_t reverseFind(const UChar* characters, unsigned length, LChar
matchCharacter, unsigned index = UINT_MAX) | 691 } |
| 633 { | 692 |
| 634 return reverseFind(characters, length, static_cast<UChar>(matchCharacter), i
ndex); | 693 static inline int codePointCompare8To16(const StringImpl* string1, const StringI
mpl* string2) { |
| 635 } | 694 return codePointCompare(string1->length(), string2->length(), string1->charact
ers8(), string2->characters16()); |
| 636 | 695 } |
| 637 inline size_t reverseFind(const LChar* characters, unsigned length, UChar matchC
haracter, unsigned index = UINT_MAX) | 696 |
| 638 { | 697 static inline int codePointCompare(const StringImpl* string1, const StringImpl*
string2) { |
| 639 if (matchCharacter & ~0xFF) | 698 if (!string1) |
| 640 return kNotFound; | 699 return (string2 && string2->length()) ? -1 : 0; |
| 641 return reverseFind(characters, length, static_cast<LChar>(matchCharacter), i
ndex); | 700 |
| 642 } | 701 if (!string2) |
| 643 | 702 return string1->length() ? 1 : 0; |
| 644 inline size_t StringImpl::find(LChar character, unsigned start) | 703 |
| 645 { | 704 bool string1Is8Bit = string1->is8Bit(); |
| 646 if (is8Bit()) | 705 bool string2Is8Bit = string2->is8Bit(); |
| 647 return WTF::find(characters8(), m_length, character, start); | 706 if (string1Is8Bit) { |
| 648 return WTF::find(characters16(), m_length, character, start); | |
| 649 } | |
| 650 | |
| 651 ALWAYS_INLINE size_t StringImpl::find(char character, unsigned start) | |
| 652 { | |
| 653 return find(static_cast<LChar>(character), start); | |
| 654 } | |
| 655 | |
| 656 inline size_t StringImpl::find(UChar character, unsigned start) | |
| 657 { | |
| 658 if (is8Bit()) | |
| 659 return WTF::find(characters8(), m_length, character, start); | |
| 660 return WTF::find(characters16(), m_length, character, start); | |
| 661 } | |
| 662 | |
| 663 inline unsigned lengthOfNullTerminatedString(const UChar* string) | |
| 664 { | |
| 665 size_t length = 0; | |
| 666 while (string[length] != UChar(0)) | |
| 667 ++length; | |
| 668 RELEASE_ASSERT(length <= std::numeric_limits<unsigned>::max()); | |
| 669 return static_cast<unsigned>(length); | |
| 670 } | |
| 671 | |
| 672 template<size_t inlineCapacity> | |
| 673 bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, StringImpl* b) | |
| 674 { | |
| 675 if (!b) | |
| 676 return !a.size(); | |
| 677 if (a.size() != b->length()) | |
| 678 return false; | |
| 679 if (b->is8Bit()) | |
| 680 return equal(a.data(), b->characters8(), b->length()); | |
| 681 return equal(a.data(), b->characters16(), b->length()); | |
| 682 } | |
| 683 | |
| 684 template<typename CharacterType1, typename CharacterType2> | |
| 685 static inline int codePointCompare(unsigned l1, unsigned l2, const CharacterType
1* c1, const CharacterType2* c2) | |
| 686 { | |
| 687 const unsigned lmin = l1 < l2 ? l1 : l2; | |
| 688 unsigned pos = 0; | |
| 689 while (pos < lmin && *c1 == *c2) { | |
| 690 ++c1; | |
| 691 ++c2; | |
| 692 ++pos; | |
| 693 } | |
| 694 | |
| 695 if (pos < lmin) | |
| 696 return (c1[0] > c2[0]) ? 1 : -1; | |
| 697 | |
| 698 if (l1 == l2) | |
| 699 return 0; | |
| 700 | |
| 701 return (l1 > l2) ? 1 : -1; | |
| 702 } | |
| 703 | |
| 704 static inline int codePointCompare8(const StringImpl* string1, const StringImpl*
string2) | |
| 705 { | |
| 706 return codePointCompare(string1->length(), string2->length(), string1->chara
cters8(), string2->characters8()); | |
| 707 } | |
| 708 | |
| 709 static inline int codePointCompare16(const StringImpl* string1, const StringImpl
* string2) | |
| 710 { | |
| 711 return codePointCompare(string1->length(), string2->length(), string1->chara
cters16(), string2->characters16()); | |
| 712 } | |
| 713 | |
| 714 static inline int codePointCompare8To16(const StringImpl* string1, const StringI
mpl* string2) | |
| 715 { | |
| 716 return codePointCompare(string1->length(), string2->length(), string1->chara
cters8(), string2->characters16()); | |
| 717 } | |
| 718 | |
| 719 static inline int codePointCompare(const StringImpl* string1, const StringImpl*
string2) | |
| 720 { | |
| 721 if (!string1) | |
| 722 return (string2 && string2->length()) ? -1 : 0; | |
| 723 | |
| 724 if (!string2) | |
| 725 return string1->length() ? 1 : 0; | |
| 726 | |
| 727 bool string1Is8Bit = string1->is8Bit(); | |
| 728 bool string2Is8Bit = string2->is8Bit(); | |
| 729 if (string1Is8Bit) { | |
| 730 if (string2Is8Bit) | |
| 731 return codePointCompare8(string1, string2); | |
| 732 return codePointCompare8To16(string1, string2); | |
| 733 } | |
| 734 if (string2Is8Bit) | 707 if (string2Is8Bit) |
| 735 return -codePointCompare8To16(string2, string1); | 708 return codePointCompare8(string1, string2); |
| 736 return codePointCompare16(string1, string2); | 709 return codePointCompare8To16(string1, string2); |
| 737 } | 710 } |
| 738 | 711 if (string2Is8Bit) |
| 739 static inline bool isSpaceOrNewline(UChar c) | 712 return -codePointCompare8To16(string2, string1); |
| 740 { | 713 return codePointCompare16(string1, string2); |
| 741 // Use isASCIISpace() for basic Latin-1. | 714 } |
| 742 // This will include newlines, which aren't included in Unicode DirWS. | 715 |
| 743 return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF:
:Unicode::WhiteSpaceNeutral; | 716 static inline bool isSpaceOrNewline(UChar c) { |
| 744 } | 717 // Use isASCIISpace() for basic Latin-1. |
| 745 | 718 // This will include newlines, which aren't included in Unicode DirWS. |
| 746 inline PassRefPtr<StringImpl> StringImpl::isolatedCopy() const | 719 return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF::U
nicode::WhiteSpaceNeutral; |
| 747 { | 720 } |
| 748 if (is8Bit()) | 721 |
| 749 return create(characters8(), m_length); | 722 inline PassRefPtr<StringImpl> StringImpl::isolatedCopy() const { |
| 750 return create(characters16(), m_length); | 723 if (is8Bit()) |
| 724 return create(characters8(), m_length); |
| 725 return create(characters16(), m_length); |
| 751 } | 726 } |
| 752 | 727 |
| 753 // TODO(rob.buis) possibly find a better place for this method. | 728 // TODO(rob.buis) possibly find a better place for this method. |
| 754 // Turns a UChar32 to uppercase based on localeIdentifier. | 729 // Turns a UChar32 to uppercase based on localeIdentifier. |
| 755 WTF_EXPORT UChar32 toUpper(UChar32, const AtomicString& localeIdentifier); | 730 WTF_EXPORT UChar32 toUpper(UChar32, const AtomicString& localeIdentifier); |
| 756 | 731 |
| 757 struct StringHash; | 732 struct StringHash; |
| 758 | 733 |
| 759 // StringHash is the default hash for StringImpl* and RefPtr<StringImpl> | 734 // StringHash is the default hash for StringImpl* and RefPtr<StringImpl> |
| 760 template<typename T> struct DefaultHash; | 735 template <typename T> |
| 761 template<> struct DefaultHash<StringImpl*> { | 736 struct DefaultHash; |
| 762 typedef StringHash Hash; | 737 template <> |
| 738 struct DefaultHash<StringImpl*> { |
| 739 typedef StringHash Hash; |
| 763 }; | 740 }; |
| 764 template<> struct DefaultHash<RefPtr<StringImpl>> { | 741 template <> |
| 765 typedef StringHash Hash; | 742 struct DefaultHash<RefPtr<StringImpl>> { |
| 743 typedef StringHash Hash; |
| 766 }; | 744 }; |
| 767 | |
| 768 } | 745 } |
| 769 | 746 |
| 770 using WTF::StringImpl; | 747 using WTF::StringImpl; |
| 771 using WTF::equal; | 748 using WTF::equal; |
| 772 using WTF::equalNonNull; | 749 using WTF::equalNonNull; |
| 773 using WTF::TextCaseSensitivity; | 750 using WTF::TextCaseSensitivity; |
| 774 using WTF::TextCaseSensitive; | 751 using WTF::TextCaseSensitive; |
| 775 using WTF::TextCaseInsensitive; | 752 using WTF::TextCaseInsensitive; |
| 776 | 753 |
| 777 #endif | 754 #endif |
| OLD | NEW |