Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(156)

Side by Side Diff: third_party/WebKit/Source/wtf/text/StringImpl.h

Issue 1611343002: wtf reformat test Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: pydent Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org) 2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2013 Apple Inc. All rights reserved. 3 * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2013 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Google Inc. All rights reserved. 4 * Copyright (C) 2009 Google Inc. All rights reserved.
5 * 5 *
6 * This library is free software; you can redistribute it and/or 6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public 7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either 8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version. 9 * version 2 of the License, or (at your option) any later version.
10 * 10 *
(...skipping 15 matching lines...) Expand all
26 #include "wtf/ASCIICType.h" 26 #include "wtf/ASCIICType.h"
27 #include "wtf/Forward.h" 27 #include "wtf/Forward.h"
28 #include "wtf/HashMap.h" 28 #include "wtf/HashMap.h"
29 #include "wtf/StringHasher.h" 29 #include "wtf/StringHasher.h"
30 #include "wtf/Vector.h" 30 #include "wtf/Vector.h"
31 #include "wtf/WTFExport.h" 31 #include "wtf/WTFExport.h"
32 #include "wtf/text/Unicode.h" 32 #include "wtf/text/Unicode.h"
33 #include <limits.h> 33 #include <limits.h>
34 34
35 #if OS(MACOSX) 35 #if OS(MACOSX)
36 typedef const struct __CFString * CFStringRef; 36 typedef const struct __CFString* CFStringRef;
37 #endif 37 #endif
38 38
39 #ifdef __OBJC__ 39 #ifdef __OBJC__
40 @class NSString; 40 @class NSString;
41 #endif 41 #endif
42 42
43 namespace WTF { 43 namespace WTF {
44 44
45 struct AlreadyHashed; 45 struct AlreadyHashed;
46 struct CStringTranslator; 46 struct CStringTranslator;
47 template<typename CharacterType> struct HashAndCharactersTranslator; 47 template <typename CharacterType>
48 struct HashAndCharactersTranslator;
48 struct HashAndUTF8CharactersTranslator; 49 struct HashAndUTF8CharactersTranslator;
49 struct LCharBufferTranslator; 50 struct LCharBufferTranslator;
50 struct CharBufferFromLiteralDataTranslator; 51 struct CharBufferFromLiteralDataTranslator;
51 struct SubstringTranslator; 52 struct SubstringTranslator;
52 struct UCharBufferTranslator; 53 struct UCharBufferTranslator;
53 template<typename> class RetainPtr; 54 template <typename>
55 class RetainPtr;
54 56
55 enum TextCaseSensitivity { TextCaseSensitive, TextCaseASCIIInsensitive, TextCase Insensitive }; 57 enum TextCaseSensitivity {
58 TextCaseSensitive,
59 TextCaseASCIIInsensitive,
60 TextCaseInsensitive
61 };
56 62
57 enum StripBehavior { StripExtraWhiteSpace, DoNotStripWhiteSpace }; 63 enum StripBehavior { StripExtraWhiteSpace, DoNotStripWhiteSpace };
58 64
59 typedef bool (*CharacterMatchFunctionPtr)(UChar); 65 typedef bool (*CharacterMatchFunctionPtr)(UChar);
60 typedef bool (*IsWhiteSpaceFunctionPtr)(UChar); 66 typedef bool (*IsWhiteSpaceFunctionPtr)(UChar);
61 typedef HashMap<unsigned, StringImpl*, AlreadyHashed> StaticStringsTable; 67 typedef HashMap<unsigned, StringImpl*, AlreadyHashed> StaticStringsTable;
62 68
63 // Define STRING_STATS to turn on run time statistics of string sizes and memory usage 69 // Define STRING_STATS to turn on run time statistics of string sizes and memory usage
64 #undef STRING_STATS 70 #undef STRING_STATS
65 71
66 #ifdef STRING_STATS 72 #ifdef STRING_STATS
67 struct StringStats { 73 struct StringStats {
68 inline void add8BitString(unsigned length) 74 inline void add8BitString(unsigned length) {
69 { 75 ++m_totalNumberStrings;
70 ++m_totalNumberStrings; 76 ++m_number8BitStrings;
71 ++m_number8BitStrings; 77 m_total8BitData += length;
72 m_total8BitData += length; 78 }
73 }
74 79
75 inline void add16BitString(unsigned length) 80 inline void add16BitString(unsigned length) {
76 { 81 ++m_totalNumberStrings;
77 ++m_totalNumberStrings; 82 ++m_number16BitStrings;
78 ++m_number16BitStrings; 83 m_total16BitData += length;
79 m_total16BitData += length; 84 }
80 }
81 85
82 void removeString(StringImpl*); 86 void removeString(StringImpl*);
83 void printStats(); 87 void printStats();
84 88
85 static const unsigned s_printStringStatsFrequency = 5000; 89 static const unsigned s_printStringStatsFrequency = 5000;
86 static unsigned s_stringRemovesTillPrintStats; 90 static unsigned s_stringRemovesTillPrintStats;
87 91
88 unsigned m_totalNumberStrings; 92 unsigned m_totalNumberStrings;
89 unsigned m_number8BitStrings; 93 unsigned m_number8BitStrings;
90 unsigned m_number16BitStrings; 94 unsigned m_number16BitStrings;
91 unsigned long long m_total8BitData; 95 unsigned long long m_total8BitData;
92 unsigned long long m_total16BitData; 96 unsigned long long m_total16BitData;
93 }; 97 };
94 98
95 void addStringForStats(StringImpl*); 99 void addStringForStats(StringImpl*);
96 void removeStringForStats(StringImpl*); 100 void removeStringForStats(StringImpl*);
97 101
98 #define STRING_STATS_ADD_8BIT_STRING(length) StringImpl::stringStats().add8BitSt ring(length); addStringForStats(this) 102 #define STRING_STATS_ADD_8BIT_STRING(length) \
99 #define STRING_STATS_ADD_16BIT_STRING(length) StringImpl::stringStats().add16Bit String(length); addStringForStats(this) 103 StringImpl::stringStats().add8BitString(length); \
100 #define STRING_STATS_REMOVE_STRING(string) StringImpl::stringStats().removeStrin g(string); removeStringForStats(this) 104 addStringForStats(this)
105 #define STRING_STATS_ADD_16BIT_STRING(length) \
106 StringImpl::stringStats().add16BitString(length); \
107 addStringForStats(this)
108 #define STRING_STATS_REMOVE_STRING(string) \
109 StringImpl::stringStats().removeString(string); \
110 removeStringForStats(this)
101 #else 111 #else
102 #define STRING_STATS_ADD_8BIT_STRING(length) ((void)0) 112 #define STRING_STATS_ADD_8BIT_STRING(length) ((void)0)
103 #define STRING_STATS_ADD_16BIT_STRING(length) ((void)0) 113 #define STRING_STATS_ADD_16BIT_STRING(length) ((void)0)
104 #define STRING_STATS_REMOVE_STRING(string) ((void)0) 114 #define STRING_STATS_REMOVE_STRING(string) ((void)0)
105 #endif 115 #endif
106 116
107 // You can find documentation about this class in this doc: 117 // You can find documentation about this class in this doc:
108 // https://docs.google.com/document/d/1kOCUlJdh2WJMJGDf-WoEQhmnjKLaOYRbiHz5TiGJl 14/edit?usp=sharing 118 // https://docs.google.com/document/d/1kOCUlJdh2WJMJGDf-WoEQhmnjKLaOYRbiHz5TiGJl 14/edit?usp=sharing
109 class WTF_EXPORT StringImpl { 119 class WTF_EXPORT StringImpl {
110 WTF_MAKE_NONCOPYABLE(StringImpl); 120 WTF_MAKE_NONCOPYABLE(StringImpl);
111 friend struct WTF::CStringTranslator; 121 friend struct WTF::CStringTranslator;
112 template<typename CharacterType> friend struct WTF::HashAndCharactersTransla tor; 122 template <typename CharacterType>
113 friend struct WTF::HashAndUTF8CharactersTranslator; 123 friend struct WTF::HashAndCharactersTranslator;
114 friend struct WTF::CharBufferFromLiteralDataTranslator; 124 friend struct WTF::HashAndUTF8CharactersTranslator;
115 friend struct WTF::LCharBufferTranslator; 125 friend struct WTF::CharBufferFromLiteralDataTranslator;
116 friend struct WTF::SubstringTranslator; 126 friend struct WTF::LCharBufferTranslator;
117 friend struct WTF::UCharBufferTranslator; 127 friend struct WTF::SubstringTranslator;
118 128 friend struct WTF::UCharBufferTranslator;
119 private: 129
120 // StringImpls are allocated out of the WTF buffer partition. 130 private:
121 void* operator new(size_t); 131 // StringImpls are allocated out of the WTF buffer partition.
122 void* operator new(size_t, void* ptr) { return ptr; } 132 void* operator new(size_t);
123 void operator delete(void*); 133 void* operator new(size_t, void* ptr) { return ptr; }
124 134 void operator delete(void*);
125 // Used to construct static strings, which have an special refCount that can 135
126 // never hit zero. This means that the static string will never be 136 // Used to construct static strings, which have an special refCount that can
127 // destroyed, which is important because static strings will be shared 137 // never hit zero. This means that the static string will never be
128 // across threads & ref-counted in a non-threadsafe manner. 138 // destroyed, which is important because static strings will be shared
129 enum ConstructEmptyStringTag { ConstructEmptyString }; 139 // across threads & ref-counted in a non-threadsafe manner.
130 explicit StringImpl(ConstructEmptyStringTag) 140 enum ConstructEmptyStringTag { ConstructEmptyString };
131 : m_refCount(1) 141 explicit StringImpl(ConstructEmptyStringTag)
132 , m_length(0) 142 : m_refCount(1),
133 , m_hash(0) 143 m_length(0),
134 , m_isAtomic(false) 144 m_hash(0),
135 , m_is8Bit(true) 145 m_isAtomic(false),
136 , m_isStatic(true) 146 m_is8Bit(true),
137 { 147 m_isStatic(true) {
138 // Ensure that the hash is computed so that AtomicStringHash can call 148 // Ensure that the hash is computed so that AtomicStringHash can call
139 // existingHash() with impunity. The empty string is special because it 149 // existingHash() with impunity. The empty string is special because it
140 // is never entered into AtomicString's HashKey, but still needs to 150 // is never entered into AtomicString's HashKey, but still needs to
141 // compare correctly. 151 // compare correctly.
142 STRING_STATS_ADD_8BIT_STRING(m_length); 152 STRING_STATS_ADD_8BIT_STRING(m_length);
143 hash(); 153 hash();
154 }
155
156 enum ConstructEmptyString16BitTag { ConstructEmptyString16Bit };
157 explicit StringImpl(ConstructEmptyString16BitTag)
158 : m_refCount(1),
159 m_length(0),
160 m_hash(0),
161 m_isAtomic(false),
162 m_is8Bit(false),
163 m_isStatic(true) {
164 STRING_STATS_ADD_16BIT_STRING(m_length);
165 hash();
166 }
167
168 // FIXME: there has to be a less hacky way to do this.
169 enum Force8Bit { Force8BitConstructor };
170 StringImpl(unsigned length, Force8Bit)
171 : m_refCount(1),
172 m_length(length),
173 m_hash(0),
174 m_isAtomic(false),
175 m_is8Bit(true),
176 m_isStatic(false) {
177 ASSERT(m_length);
178 STRING_STATS_ADD_8BIT_STRING(m_length);
179 }
180
181 StringImpl(unsigned length)
182 : m_refCount(1),
183 m_length(length),
184 m_hash(0),
185 m_isAtomic(false),
186 m_is8Bit(false),
187 m_isStatic(false) {
188 ASSERT(m_length);
189 STRING_STATS_ADD_16BIT_STRING(m_length);
190 }
191
192 enum StaticStringTag { StaticString };
193 StringImpl(unsigned length, unsigned hash, StaticStringTag)
194 : m_refCount(1),
195 m_length(length),
196 m_hash(hash),
197 m_isAtomic(false),
198 m_is8Bit(true),
199 m_isStatic(true) {}
200
201 public:
202 ~StringImpl();
203
204 static StringImpl* createStatic(const char* string,
205 unsigned length,
206 unsigned hash);
207 static void reserveStaticStringsCapacityForSize(unsigned size);
208 static void freezeStaticStrings();
209 static const StaticStringsTable& allStaticStrings();
210 static unsigned highestStaticStringLength() {
211 return m_highestStaticStringLength;
212 }
213
214 static PassRefPtr<StringImpl> create(const UChar*, unsigned length);
215 static PassRefPtr<StringImpl> create(const LChar*, unsigned length);
216 static PassRefPtr<StringImpl> create8BitIfPossible(const UChar*,
217 unsigned length);
218 template <size_t inlineCapacity>
219 static PassRefPtr<StringImpl> create8BitIfPossible(
220 const Vector<UChar, inlineCapacity>& vector) {
221 return create8BitIfPossible(vector.data(), vector.size());
222 }
223
224 ALWAYS_INLINE static PassRefPtr<StringImpl> create(const char* s,
225 unsigned length) {
226 return create(reinterpret_cast<const LChar*>(s), length);
227 }
228 static PassRefPtr<StringImpl> create(const LChar*);
229 ALWAYS_INLINE static PassRefPtr<StringImpl> create(const char* s) {
230 return create(reinterpret_cast<const LChar*>(s));
231 }
232
233 static PassRefPtr<StringImpl> createUninitialized(unsigned length,
234 LChar*& data);
235 static PassRefPtr<StringImpl> createUninitialized(unsigned length,
236 UChar*& data);
237
238 unsigned length() const { return m_length; }
239 bool is8Bit() const { return m_is8Bit; }
240
241 ALWAYS_INLINE const LChar* characters8() const {
242 ASSERT(is8Bit());
243 return reinterpret_cast<const LChar*>(this + 1);
244 }
245 ALWAYS_INLINE const UChar* characters16() const {
246 ASSERT(!is8Bit());
247 return reinterpret_cast<const UChar*>(this + 1);
248 }
249
250 template <typename CharType>
251 ALWAYS_INLINE const CharType* getCharacters() const;
252
253 size_t sizeInBytes() const;
254
255 bool isAtomic() const { return m_isAtomic; }
256 void setIsAtomic(bool isAtomic) { m_isAtomic = isAtomic; }
257
258 bool isStatic() const { return m_isStatic; }
259
260 private:
261 // The high bits of 'hash' are always empty, but we prefer to store our
262 // flags in the low bits because it makes them slightly more efficient to
263 // access. So, we shift left and right when setting and getting our hash
264 // code.
265 void setHash(unsigned hash) const {
266 ASSERT(!hasHash());
267 // Multiple clients assume that StringHasher is the canonical string
268 // hash function.
269 ASSERT(hash == (is8Bit() ? StringHasher::computeHashAndMaskTop8Bits(
270 characters8(), m_length)
271 : StringHasher::computeHashAndMaskTop8Bits(
272 characters16(), m_length)));
273 m_hash = hash;
274 ASSERT(hash); // Verify that 0 is a valid sentinel hash value.
275 }
276
277 unsigned rawHash() const { return m_hash; }
278
279 void destroyIfNotStatic();
280
281 public:
282 bool hasHash() const { return rawHash() != 0; }
283
284 unsigned existingHash() const {
285 ASSERT(hasHash());
286 return rawHash();
287 }
288
289 unsigned hash() const {
290 if (hasHash())
291 return existingHash();
292 return hashSlowCase();
293 }
294
295 ALWAYS_INLINE bool hasOneRef() const { return m_refCount == 1; }
296
297 ALWAYS_INLINE void ref() { ++m_refCount; }
298
299 ALWAYS_INLINE void deref() {
300 if (hasOneRef()) {
301 destroyIfNotStatic();
302 return;
144 } 303 }
145 304
146 enum ConstructEmptyString16BitTag { ConstructEmptyString16Bit }; 305 --m_refCount;
147 explicit StringImpl(ConstructEmptyString16BitTag) 306 }
148 : m_refCount(1) 307
149 , m_length(0) 308 static StringImpl* empty();
150 , m_hash(0) 309 static StringImpl* empty16Bit();
151 , m_isAtomic(false) 310
152 , m_is8Bit(false) 311 // FIXME: Does this really belong in StringImpl?
153 , m_isStatic(true) 312 template <typename T>
154 { 313 static void copyChars(T* destination,
155 STRING_STATS_ADD_16BIT_STRING(m_length); 314 const T* source,
156 hash(); 315 unsigned numCharacters) {
157 } 316 memcpy(destination, source, numCharacters * sizeof(T));
158 317 }
159 // FIXME: there has to be a less hacky way to do this. 318
160 enum Force8Bit { Force8BitConstructor }; 319 ALWAYS_INLINE static void copyChars(UChar* destination,
161 StringImpl(unsigned length, Force8Bit) 320 const LChar* source,
162 : m_refCount(1) 321 unsigned numCharacters) {
163 , m_length(length) 322 for (unsigned i = 0; i < numCharacters; ++i)
164 , m_hash(0) 323 destination[i] = source[i];
165 , m_isAtomic(false) 324 }
166 , m_is8Bit(true) 325
167 , m_isStatic(false) 326 // Some string features, like refcounting and the atomicity flag, are not
168 { 327 // thread-safe. We achieve thread safety by isolation, giving each thread
169 ASSERT(m_length); 328 // its own copy of the string.
170 STRING_STATS_ADD_8BIT_STRING(m_length); 329 PassRefPtr<StringImpl> isolatedCopy() const;
171 } 330
172 331 PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX);
173 StringImpl(unsigned length) 332
174 : m_refCount(1) 333 UChar operator[](unsigned i) const {
175 , m_length(length) 334 ASSERT_WITH_SECURITY_IMPLICATION(i < m_length);
176 , m_hash(0) 335 if (is8Bit())
177 , m_isAtomic(false) 336 return characters8()[i];
178 , m_is8Bit(false) 337 return characters16()[i];
179 , m_isStatic(false) 338 }
180 { 339 UChar32 characterStartingAt(unsigned);
181 ASSERT(m_length); 340
182 STRING_STATS_ADD_16BIT_STRING(m_length); 341 bool containsOnlyWhitespace();
183 } 342
184 343 int toIntStrict(bool* ok = 0, int base = 10);
185 enum StaticStringTag { StaticString }; 344 unsigned toUIntStrict(bool* ok = 0, int base = 10);
186 StringImpl(unsigned length, unsigned hash, StaticStringTag) 345 int64_t toInt64Strict(bool* ok = 0, int base = 10);
187 : m_refCount(1) 346 uint64_t toUInt64Strict(bool* ok = 0, int base = 10);
188 , m_length(length) 347
189 , m_hash(hash) 348 int toInt(bool* ok = 0); // ignores trailing garbage
190 , m_isAtomic(false) 349 unsigned toUInt(bool* ok = 0); // ignores trailing garbage
191 , m_is8Bit(true) 350 int64_t toInt64(bool* ok = 0); // ignores trailing garbage
192 , m_isStatic(true) 351 uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage
193 { 352
194 } 353 // FIXME: Like the strict functions above, these give false for "ok" when
195 354 // there is trailing garbage. Like the non-strict functions above, these
196 public: 355 // return the value when there is trailing garbage. It would be better if
197 ~StringImpl(); 356 // these were more consistent with the above functions instead.
198 357 double toDouble(bool* ok = 0);
199 static StringImpl* createStatic(const char* string, unsigned length, unsigne d hash); 358 float toFloat(bool* ok = 0);
200 static void reserveStaticStringsCapacityForSize(unsigned size); 359
201 static void freezeStaticStrings(); 360 PassRefPtr<StringImpl> lower();
202 static const StaticStringsTable& allStaticStrings(); 361 PassRefPtr<StringImpl> lowerASCII();
203 static unsigned highestStaticStringLength() { return m_highestStaticStringLe ngth; } 362 PassRefPtr<StringImpl> upper();
204 363 PassRefPtr<StringImpl> lower(const AtomicString& localeIdentifier);
205 static PassRefPtr<StringImpl> create(const UChar*, unsigned length); 364 PassRefPtr<StringImpl> upper(const AtomicString& localeIdentifier);
206 static PassRefPtr<StringImpl> create(const LChar*, unsigned length); 365
207 static PassRefPtr<StringImpl> create8BitIfPossible(const UChar*, unsigned le ngth); 366 PassRefPtr<StringImpl> fill(UChar);
208 template<size_t inlineCapacity> 367 // FIXME: Do we need fill(char) or can we just do the right thing if UChar is ASCII?
209 static PassRefPtr<StringImpl> create8BitIfPossible(const Vector<UChar, inlin eCapacity>& vector) 368 PassRefPtr<StringImpl> foldCase();
210 { 369
211 return create8BitIfPossible(vector.data(), vector.size()); 370 PassRefPtr<StringImpl> stripWhiteSpace();
212 } 371 PassRefPtr<StringImpl> stripWhiteSpace(IsWhiteSpaceFunctionPtr);
213 372 PassRefPtr<StringImpl> simplifyWhiteSpace(
214 ALWAYS_INLINE static PassRefPtr<StringImpl> create(const char* s, unsigned l ength) { return create(reinterpret_cast<const LChar*>(s), length); } 373 StripBehavior = StripExtraWhiteSpace);
215 static PassRefPtr<StringImpl> create(const LChar*); 374 PassRefPtr<StringImpl> simplifyWhiteSpace(
216 ALWAYS_INLINE static PassRefPtr<StringImpl> create(const char* s) { return c reate(reinterpret_cast<const LChar*>(s)); } 375 IsWhiteSpaceFunctionPtr,
217 376 StripBehavior = StripExtraWhiteSpace);
218 static PassRefPtr<StringImpl> createUninitialized(unsigned length, LChar*& d ata); 377
219 static PassRefPtr<StringImpl> createUninitialized(unsigned length, UChar*& d ata); 378 PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr);
220 379 template <typename CharType>
221 unsigned length() const { return m_length; } 380 ALWAYS_INLINE PassRefPtr<StringImpl> removeCharacters(
222 bool is8Bit() const { return m_is8Bit; } 381 const CharType* characters,
223 382 CharacterMatchFunctionPtr);
224 ALWAYS_INLINE const LChar* characters8() const { ASSERT(is8Bit()); return re interpret_cast<const LChar*>(this + 1); } 383
225 ALWAYS_INLINE const UChar* characters16() const { ASSERT(!is8Bit()); return reinterpret_cast<const UChar*>(this + 1); } 384 size_t find(LChar character, unsigned start = 0);
226 385 size_t find(char character, unsigned start = 0);
227 template <typename CharType> 386 size_t find(UChar character, unsigned start = 0);
228 ALWAYS_INLINE const CharType * getCharacters() const; 387 size_t find(CharacterMatchFunctionPtr, unsigned index = 0);
229 388 size_t find(const LChar*, unsigned index = 0);
230 size_t sizeInBytes() const; 389 ALWAYS_INLINE size_t find(const char* s, unsigned index = 0) {
231 390 return find(reinterpret_cast<const LChar*>(s), index);
232 bool isAtomic() const { return m_isAtomic; } 391 }
233 void setIsAtomic(bool isAtomic) { m_isAtomic = isAtomic; } 392 size_t find(StringImpl*);
234 393 size_t find(StringImpl*, unsigned index);
235 bool isStatic() const { return m_isStatic; } 394 size_t findIgnoringCase(const LChar*, unsigned index = 0);
236 395 ALWAYS_INLINE size_t findIgnoringCase(const char* s, unsigned index = 0) {
237 private: 396 return findIgnoringCase(reinterpret_cast<const LChar*>(s), index);
238 // The high bits of 'hash' are always empty, but we prefer to store our 397 }
239 // flags in the low bits because it makes them slightly more efficient to 398 size_t findIgnoringCase(StringImpl*, unsigned index = 0);
240 // access. So, we shift left and right when setting and getting our hash 399 size_t findIgnoringASCIICase(StringImpl*, unsigned index = 0);
241 // code. 400
242 void setHash(unsigned hash) const 401 size_t findNextLineStart(unsigned index = UINT_MAX);
243 { 402
244 ASSERT(!hasHash()); 403 size_t reverseFind(UChar, unsigned index = UINT_MAX);
245 // Multiple clients assume that StringHasher is the canonical string 404 size_t reverseFind(StringImpl*, unsigned index = UINT_MAX);
246 // hash function. 405
247 ASSERT(hash == (is8Bit() ? StringHasher::computeHashAndMaskTop8Bits(char acters8(), m_length) : StringHasher::computeHashAndMaskTop8Bits(characters16(), m_length))); 406 size_t count(LChar) const;
248 m_hash = hash; 407
249 ASSERT(hash); // Verify that 0 is a valid sentinel hash value. 408 bool startsWith(UChar) const;
250 } 409 bool startsWith(const char*, unsigned prefixLength) const;
251 410 bool startsWith(const StringImpl*) const;
252 unsigned rawHash() const 411 bool startsWithIgnoringCase(const char*, unsigned prefixLength) const;
253 { 412 bool startsWithIgnoringCase(const StringImpl*) const;
254 return m_hash; 413 bool startsWithIgnoringASCIICase(const char*, unsigned prefixLength) const;
255 } 414 bool startsWithIgnoringASCIICase(const StringImpl*) const;
256 415
257 void destroyIfNotStatic(); 416 bool endsWith(UChar) const;
258 417 bool endsWith(const char*, unsigned suffixLength) const;
259 public: 418 bool endsWith(const StringImpl*) const;
260 bool hasHash() const 419 bool endsWithIgnoringCase(const char*, unsigned suffixLength) const;
261 { 420 bool endsWithIgnoringCase(const StringImpl*) const;
262 return rawHash() != 0; 421 bool endsWithIgnoringASCIICase(const char*, unsigned suffixLength) const;
263 } 422 bool endsWithIgnoringASCIICase(const StringImpl*) const;
264 423
265 unsigned existingHash() const 424 PassRefPtr<StringImpl> replace(UChar, UChar);
266 { 425 PassRefPtr<StringImpl> replace(UChar, StringImpl*);
267 ASSERT(hasHash()); 426 ALWAYS_INLINE PassRefPtr<StringImpl> replace(UChar pattern,
268 return rawHash(); 427 const char* replacement,
269 } 428 unsigned replacementLength) {
270 429 return replace(pattern, reinterpret_cast<const LChar*>(replacement),
271 unsigned hash() const 430 replacementLength);
272 { 431 }
273 if (hasHash()) 432 PassRefPtr<StringImpl> replace(UChar,
274 return existingHash(); 433 const LChar*,
275 return hashSlowCase(); 434 unsigned replacementLength);
276 } 435 PassRefPtr<StringImpl> replace(UChar,
277 436 const UChar*,
278 ALWAYS_INLINE bool hasOneRef() const 437 unsigned replacementLength);
279 { 438 PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*);
280 return m_refCount == 1; 439 PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*);
281 } 440 PassRefPtr<StringImpl> upconvertedString();
282
283 ALWAYS_INLINE void ref()
284 {
285 ++m_refCount;
286 }
287
288 ALWAYS_INLINE void deref()
289 {
290 if (hasOneRef()) {
291 destroyIfNotStatic();
292 return;
293 }
294
295 --m_refCount;
296 }
297
298 static StringImpl* empty();
299 static StringImpl* empty16Bit();
300
301 // FIXME: Does this really belong in StringImpl?
302 template <typename T> static void copyChars(T* destination, const T* source, unsigned numCharacters)
303 {
304 memcpy(destination, source, numCharacters * sizeof(T));
305 }
306
307 ALWAYS_INLINE static void copyChars(UChar* destination, const LChar* source, unsigned numCharacters)
308 {
309 for (unsigned i = 0; i < numCharacters; ++i)
310 destination[i] = source[i];
311 }
312
313 // Some string features, like refcounting and the atomicity flag, are not
314 // thread-safe. We achieve thread safety by isolation, giving each thread
315 // its own copy of the string.
316 PassRefPtr<StringImpl> isolatedCopy() const;
317
318 PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX);
319
320 UChar operator[](unsigned i) const
321 {
322 ASSERT_WITH_SECURITY_IMPLICATION(i < m_length);
323 if (is8Bit())
324 return characters8()[i];
325 return characters16()[i];
326 }
327 UChar32 characterStartingAt(unsigned);
328
329 bool containsOnlyWhitespace();
330
331 int toIntStrict(bool* ok = 0, int base = 10);
332 unsigned toUIntStrict(bool* ok = 0, int base = 10);
333 int64_t toInt64Strict(bool* ok = 0, int base = 10);
334 uint64_t toUInt64Strict(bool* ok = 0, int base = 10);
335
336 int toInt(bool* ok = 0); // ignores trailing garbage
337 unsigned toUInt(bool* ok = 0); // ignores trailing garbage
338 int64_t toInt64(bool* ok = 0); // ignores trailing garbage
339 uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage
340
341 // FIXME: Like the strict functions above, these give false for "ok" when
342 // there is trailing garbage. Like the non-strict functions above, these
343 // return the value when there is trailing garbage. It would be better if
344 // these were more consistent with the above functions instead.
345 double toDouble(bool* ok = 0);
346 float toFloat(bool* ok = 0);
347
348 PassRefPtr<StringImpl> lower();
349 PassRefPtr<StringImpl> lowerASCII();
350 PassRefPtr<StringImpl> upper();
351 PassRefPtr<StringImpl> lower(const AtomicString& localeIdentifier);
352 PassRefPtr<StringImpl> upper(const AtomicString& localeIdentifier);
353
354 PassRefPtr<StringImpl> fill(UChar);
355 // FIXME: Do we need fill(char) or can we just do the right thing if UChar i s ASCII?
356 PassRefPtr<StringImpl> foldCase();
357
358 PassRefPtr<StringImpl> stripWhiteSpace();
359 PassRefPtr<StringImpl> stripWhiteSpace(IsWhiteSpaceFunctionPtr);
360 PassRefPtr<StringImpl> simplifyWhiteSpace(StripBehavior = StripExtraWhiteSpa ce);
361 PassRefPtr<StringImpl> simplifyWhiteSpace(IsWhiteSpaceFunctionPtr, StripBeha vior = StripExtraWhiteSpace);
362
363 PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr);
364 template <typename CharType>
365 ALWAYS_INLINE PassRefPtr<StringImpl> removeCharacters(const CharType* charac ters, CharacterMatchFunctionPtr);
366
367 size_t find(LChar character, unsigned start = 0);
368 size_t find(char character, unsigned start = 0);
369 size_t find(UChar character, unsigned start = 0);
370 size_t find(CharacterMatchFunctionPtr, unsigned index = 0);
371 size_t find(const LChar*, unsigned index = 0);
372 ALWAYS_INLINE size_t find(const char* s, unsigned index = 0) { return find(r einterpret_cast<const LChar*>(s), index); }
373 size_t find(StringImpl*);
374 size_t find(StringImpl*, unsigned index);
375 size_t findIgnoringCase(const LChar*, unsigned index = 0);
376 ALWAYS_INLINE size_t findIgnoringCase(const char* s, unsigned index = 0) { r eturn findIgnoringCase(reinterpret_cast<const LChar*>(s), index); }
377 size_t findIgnoringCase(StringImpl*, unsigned index = 0);
378 size_t findIgnoringASCIICase(StringImpl*, unsigned index = 0);
379
380 size_t findNextLineStart(unsigned index = UINT_MAX);
381
382 size_t reverseFind(UChar, unsigned index = UINT_MAX);
383 size_t reverseFind(StringImpl*, unsigned index = UINT_MAX);
384
385 size_t count(LChar) const;
386
387 bool startsWith(UChar) const;
388 bool startsWith(const char*, unsigned prefixLength) const;
389 bool startsWith(const StringImpl*) const;
390 bool startsWithIgnoringCase(const char*, unsigned prefixLength) const;
391 bool startsWithIgnoringCase(const StringImpl*) const;
392 bool startsWithIgnoringASCIICase(const char*, unsigned prefixLength) const;
393 bool startsWithIgnoringASCIICase(const StringImpl*) const;
394
395 bool endsWith(UChar) const;
396 bool endsWith(const char*, unsigned suffixLength) const;
397 bool endsWith(const StringImpl*) const;
398 bool endsWithIgnoringCase(const char*, unsigned suffixLength) const;
399 bool endsWithIgnoringCase(const StringImpl*) const;
400 bool endsWithIgnoringASCIICase(const char*, unsigned suffixLength) const;
401 bool endsWithIgnoringASCIICase(const StringImpl*) const;
402
403 PassRefPtr<StringImpl> replace(UChar, UChar);
404 PassRefPtr<StringImpl> replace(UChar, StringImpl*);
405 ALWAYS_INLINE PassRefPtr<StringImpl> replace(UChar pattern, const char* repl acement, unsigned replacementLength) { return replace(pattern, reinterpret_cast< const LChar*>(replacement), replacementLength); }
406 PassRefPtr<StringImpl> replace(UChar, const LChar*, unsigned replacementLeng th);
407 PassRefPtr<StringImpl> replace(UChar, const UChar*, unsigned replacementLeng th);
408 PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*);
409 PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*);
410 PassRefPtr<StringImpl> upconvertedString();
411 441
412 #if OS(MACOSX) 442 #if OS(MACOSX)
413 RetainPtr<CFStringRef> createCFString(); 443 RetainPtr<CFStringRef> createCFString();
414 #endif 444 #endif
415 #ifdef __OBJC__ 445 #ifdef __OBJC__
416 operator NSString*(); 446 operator NSString*();
417 #endif 447 #endif
418 448
419 #ifdef STRING_STATS 449 #ifdef STRING_STATS
420 ALWAYS_INLINE static StringStats& stringStats() { return m_stringStats; } 450 ALWAYS_INLINE static StringStats& stringStats() { return m_stringStats; }
421 #endif 451 #endif
422 static const UChar latin1CaseFoldTable[256]; 452 static const UChar latin1CaseFoldTable[256];
423 453
424 private: 454 private:
425 template<typename CharType> static size_t allocationSize(unsigned length) 455 template <typename CharType>
426 { 456 static size_t allocationSize(unsigned length) {
427 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof (StringImpl)) / sizeof(CharType))); 457 RELEASE_ASSERT(
428 return sizeof(StringImpl) + length * sizeof(CharType); 458 length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) /
429 } 459 sizeof(CharType)));
430 460 return sizeof(StringImpl) + length * sizeof(CharType);
431 template <class UCharPredicate> PassRefPtr<StringImpl> stripMatchedCharacter s(UCharPredicate); 461 }
432 template <typename CharType, class UCharPredicate> PassRefPtr<StringImpl> si mplifyMatchedCharactersToSpace(UCharPredicate, StripBehavior); 462
433 NEVER_INLINE unsigned hashSlowCase() const; 463 template <class UCharPredicate>
464 PassRefPtr<StringImpl> stripMatchedCharacters(UCharPredicate);
465 template <typename CharType, class UCharPredicate>
466 PassRefPtr<StringImpl> simplifyMatchedCharactersToSpace(UCharPredicate,
467 StripBehavior);
468 NEVER_INLINE unsigned hashSlowCase() const;
434 469
435 #ifdef STRING_STATS 470 #ifdef STRING_STATS
436 static StringStats m_stringStats; 471 static StringStats m_stringStats;
437 #endif 472 #endif
438 473
439 static unsigned m_highestStaticStringLength; 474 static unsigned m_highestStaticStringLength;
440 475
441 #if ENABLE(ASSERT) 476 #if ENABLE(ASSERT)
442 void assertHashIsCorrect() 477 void assertHashIsCorrect() {
443 { 478 ASSERT(hasHash());
444 ASSERT(hasHash()); 479 ASSERT(existingHash() ==
445 ASSERT(existingHash() == StringHasher::computeHashAndMaskTop8Bits(charac ters8(), length())); 480 StringHasher::computeHashAndMaskTop8Bits(characters8(), length()));
446 } 481 }
447 #endif 482 #endif
448 483
449 private: 484 private:
450 unsigned m_refCount; 485 unsigned m_refCount;
451 const unsigned m_length; 486 const unsigned m_length;
452 mutable unsigned m_hash : 24; 487 mutable unsigned m_hash : 24;
453 unsigned m_isAtomic : 1; 488 unsigned m_isAtomic : 1;
454 const unsigned m_is8Bit : 1; 489 const unsigned m_is8Bit : 1;
455 const unsigned m_isStatic : 1; 490 const unsigned m_isStatic : 1;
456 }; 491 };
457 492
458 template <> 493 template <>
459 ALWAYS_INLINE const LChar* StringImpl::getCharacters<LChar>() const { return cha racters8(); } 494 ALWAYS_INLINE const LChar* StringImpl::getCharacters<LChar>() const {
495 return characters8();
496 }
460 497
461 template <> 498 template <>
462 ALWAYS_INLINE const UChar* StringImpl::getCharacters<UChar>() const { return cha racters16(); } 499 ALWAYS_INLINE const UChar* StringImpl::getCharacters<UChar>() const {
500 return characters16();
501 }
463 502
464 WTF_EXPORT bool equal(const StringImpl*, const StringImpl*); 503 WTF_EXPORT bool equal(const StringImpl*, const StringImpl*);
465 WTF_EXPORT bool equal(const StringImpl*, const LChar*); 504 WTF_EXPORT bool equal(const StringImpl*, const LChar*);
466 inline bool equal(const StringImpl* a, const char* b) { return equal(a, reinterp ret_cast<const LChar*>(b)); } 505 inline bool equal(const StringImpl* a, const char* b) {
506 return equal(a, reinterpret_cast<const LChar*>(b));
507 }
467 WTF_EXPORT bool equal(const StringImpl*, const LChar*, unsigned); 508 WTF_EXPORT bool equal(const StringImpl*, const LChar*, unsigned);
468 WTF_EXPORT bool equal(const StringImpl*, const UChar*, unsigned); 509 WTF_EXPORT bool equal(const StringImpl*, const UChar*, unsigned);
469 inline bool equal(const StringImpl* a, const char* b, unsigned length) { return equal(a, reinterpret_cast<const LChar*>(b), length); } 510 inline bool equal(const StringImpl* a, const char* b, unsigned length) {
470 inline bool equal(const LChar* a, StringImpl* b) { return equal(b, a); } 511 return equal(a, reinterpret_cast<const LChar*>(b), length);
471 inline bool equal(const char* a, StringImpl* b) { return equal(b, reinterpret_ca st<const LChar*>(a)); } 512 }
513 inline bool equal(const LChar* a, StringImpl* b) {
514 return equal(b, a);
515 }
516 inline bool equal(const char* a, StringImpl* b) {
517 return equal(b, reinterpret_cast<const LChar*>(a));
518 }
472 WTF_EXPORT bool equalNonNull(const StringImpl* a, const StringImpl* b); 519 WTF_EXPORT bool equalNonNull(const StringImpl* a, const StringImpl* b);
473 520
474 template<typename CharType> 521 template <typename CharType>
475 ALWAYS_INLINE bool equal(const CharType* a, const CharType* b, unsigned length) { return !memcmp(a, b, length * sizeof(CharType)); } 522 ALWAYS_INLINE bool equal(const CharType* a,
476 523 const CharType* b,
477 ALWAYS_INLINE bool equal(const LChar* a, const UChar* b, unsigned length) 524 unsigned length) {
478 { 525 return !memcmp(a, b, length * sizeof(CharType));
479 for (unsigned i = 0; i < length; ++i) { 526 }
480 if (a[i] != b[i]) 527
481 return false; 528 ALWAYS_INLINE bool equal(const LChar* a, const UChar* b, unsigned length) {
482 } 529 for (unsigned i = 0; i < length; ++i) {
483 return true; 530 if (a[i] != b[i])
484 } 531 return false;
485 532 }
486 ALWAYS_INLINE bool equal(const UChar* a, const LChar* b, unsigned length) { retu rn equal(b, a, length); } 533 return true;
534 }
535
536 ALWAYS_INLINE bool equal(const UChar* a, const LChar* b, unsigned length) {
537 return equal(b, a, length);
538 }
487 539
488 WTF_EXPORT bool equalIgnoringCase(const StringImpl*, const StringImpl*); 540 WTF_EXPORT bool equalIgnoringCase(const StringImpl*, const StringImpl*);
489 WTF_EXPORT bool equalIgnoringCase(const StringImpl*, const LChar*); 541 WTF_EXPORT bool equalIgnoringCase(const StringImpl*, const LChar*);
490 inline bool equalIgnoringCase(const LChar* a, const StringImpl* b) { return equa lIgnoringCase(b, a); } 542 inline bool equalIgnoringCase(const LChar* a, const StringImpl* b) {
543 return equalIgnoringCase(b, a);
544 }
491 WTF_EXPORT bool equalIgnoringCase(const LChar*, const LChar*, unsigned); 545 WTF_EXPORT bool equalIgnoringCase(const LChar*, const LChar*, unsigned);
492 WTF_EXPORT bool equalIgnoringCase(const UChar*, const LChar*, unsigned); 546 WTF_EXPORT bool equalIgnoringCase(const UChar*, const LChar*, unsigned);
493 inline bool equalIgnoringCase(const UChar* a, const char* b, unsigned length) { return equalIgnoringCase(a, reinterpret_cast<const LChar*>(b), length); } 547 inline bool equalIgnoringCase(const UChar* a, const char* b, unsigned length) {
494 inline bool equalIgnoringCase(const LChar* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, a, length); } 548 return equalIgnoringCase(a, reinterpret_cast<const LChar*>(b), length);
495 inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, reinterpret_cast<const LChar*>(a), length); } 549 }
496 inline bool equalIgnoringCase(const char* a, const LChar* b, unsigned length) { return equalIgnoringCase(b, reinterpret_cast<const LChar*>(a), length); } 550 inline bool equalIgnoringCase(const LChar* a, const UChar* b, unsigned length) {
497 inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length) 551 return equalIgnoringCase(b, a, length);
498 { 552 }
499 ASSERT(length >= 0); 553 inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) {
500 return !Unicode::umemcasecmp(a, b, length); 554 return equalIgnoringCase(b, reinterpret_cast<const LChar*>(a), length);
555 }
556 inline bool equalIgnoringCase(const char* a, const LChar* b, unsigned length) {
557 return equalIgnoringCase(b, reinterpret_cast<const LChar*>(a), length);
558 }
559 inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length) {
560 ASSERT(length >= 0);
561 return !Unicode::umemcasecmp(a, b, length);
501 } 562 }
502 WTF_EXPORT bool equalIgnoringCaseNonNull(const StringImpl*, const StringImpl*); 563 WTF_EXPORT bool equalIgnoringCaseNonNull(const StringImpl*, const StringImpl*);
503 564
504 WTF_EXPORT bool equalIgnoringNullity(StringImpl*, StringImpl*); 565 WTF_EXPORT bool equalIgnoringNullity(StringImpl*, StringImpl*);
505 566
506 template<typename CharacterTypeA, typename CharacterTypeB> 567 template <typename CharacterTypeA, typename CharacterTypeB>
507 inline bool equalIgnoringASCIICase(const CharacterTypeA* a, const CharacterTypeB * b, unsigned length) 568 inline bool equalIgnoringASCIICase(const CharacterTypeA* a,
508 { 569 const CharacterTypeB* b,
509 for (unsigned i = 0; i < length; ++i) { 570 unsigned length) {
510 if (toASCIILower(a[i]) != toASCIILower(b[i])) 571 for (unsigned i = 0; i < length; ++i) {
511 return false; 572 if (toASCIILower(a[i]) != toASCIILower(b[i]))
573 return false;
574 }
575 return true;
576 }
577
578 template <typename CharacterTypeA, typename CharacterTypeB>
579 inline bool equalIgnoringASCIICase(const CharacterTypeA& a,
580 const CharacterTypeB& b) {
581 unsigned length = b.length();
582 if (a.length() != length)
583 return false;
584 if (a.is8Bit()) {
585 if (b.is8Bit())
586 return equalIgnoringASCIICase(a.characters8(), b.characters8(), length);
587 return equalIgnoringASCIICase(a.characters8(), b.characters16(), length);
588 }
589 if (b.is8Bit())
590 return equalIgnoringASCIICase(a.characters16(), b.characters8(), length);
591 return equalIgnoringASCIICase(a.characters16(), b.characters16(), length);
592 }
593
594 template <typename CharacterType>
595 inline size_t find(const CharacterType* characters,
596 unsigned length,
597 CharacterType matchCharacter,
598 unsigned index = 0) {
599 while (index < length) {
600 if (characters[index] == matchCharacter)
601 return index;
602 ++index;
603 }
604 return kNotFound;
605 }
606
607 ALWAYS_INLINE size_t find(const UChar* characters,
608 unsigned length,
609 LChar matchCharacter,
610 unsigned index = 0) {
611 return find(characters, length, static_cast<UChar>(matchCharacter), index);
612 }
613
614 inline size_t find(const LChar* characters,
615 unsigned length,
616 UChar matchCharacter,
617 unsigned index = 0) {
618 if (matchCharacter & ~0xFF)
619 return kNotFound;
620 return find(characters, length, static_cast<LChar>(matchCharacter), index);
621 }
622
623 inline size_t find(const LChar* characters,
624 unsigned length,
625 CharacterMatchFunctionPtr matchFunction,
626 unsigned index = 0) {
627 while (index < length) {
628 if (matchFunction(characters[index]))
629 return index;
630 ++index;
631 }
632 return kNotFound;
633 }
634
635 inline size_t find(const UChar* characters,
636 unsigned length,
637 CharacterMatchFunctionPtr matchFunction,
638 unsigned index = 0) {
639 while (index < length) {
640 if (matchFunction(characters[index]))
641 return index;
642 ++index;
643 }
644 return kNotFound;
645 }
646
647 template <typename CharacterType>
648 inline size_t findNextLineStart(const CharacterType* characters,
649 unsigned length,
650 unsigned index = 0) {
651 while (index < length) {
652 CharacterType c = characters[index++];
653 if ((c != '\n') && (c != '\r'))
654 continue;
655
656 // There can only be a start of a new line if there are more characters
657 // beyond the current character.
658 if (index < length) {
659 // The 3 common types of line terminators are 1. \r\n (Windows),
660 // 2. \r (old MacOS) and 3. \n (Unix'es).
661
662 if (c == '\n')
663 return index; // Case 3: just \n.
664
665 CharacterType c2 = characters[index];
666 if (c2 != '\n')
667 return index; // Case 2: just \r.
668
669 // Case 1: \r\n.
670 // But, there's only a start of a new line if there are more
671 // characters beyond the \r\n.
672 if (++index < length)
673 return index;
512 } 674 }
513 return true; 675 }
514 } 676 return kNotFound;
515 677 }
516 template<typename CharacterTypeA, typename CharacterTypeB> 678
517 inline bool equalIgnoringASCIICase(const CharacterTypeA& a, const CharacterTypeB & b) 679 template <typename CharacterType>
518 { 680 inline size_t reverseFindLineTerminator(const CharacterType* characters,
519 unsigned length = b.length(); 681 unsigned length,
520 if (a.length() != length) 682 unsigned index = UINT_MAX) {
521 return false; 683 if (!length)
522 if (a.is8Bit()) {
523 if (b.is8Bit())
524 return equalIgnoringASCIICase(a.characters8(), b.characters8(), leng th);
525 return equalIgnoringASCIICase(a.characters8(), b.characters16(), length) ;
526 }
527 if (b.is8Bit())
528 return equalIgnoringASCIICase(a.characters16(), b.characters8(), length) ;
529 return equalIgnoringASCIICase(a.characters16(), b.characters16(), length);
530 }
531
532 template<typename CharacterType>
533 inline size_t find(const CharacterType* characters, unsigned length, CharacterTy pe matchCharacter, unsigned index = 0)
534 {
535 while (index < length) {
536 if (characters[index] == matchCharacter)
537 return index;
538 ++index;
539 }
540 return kNotFound; 684 return kNotFound;
541 } 685 if (index >= length)
542 686 index = length - 1;
543 ALWAYS_INLINE size_t find(const UChar* characters, unsigned length, LChar matchC haracter, unsigned index = 0) 687 CharacterType c = characters[index];
544 { 688 while ((c != '\n') && (c != '\r')) {
545 return find(characters, length, static_cast<UChar>(matchCharacter), index); 689 if (!index--)
546 } 690 return kNotFound;
547 691 c = characters[index];
548 inline size_t find(const LChar* characters, unsigned length, UChar matchCharacte r, unsigned index = 0) 692 }
549 { 693 return index;
550 if (matchCharacter & ~0xFF) 694 }
551 return kNotFound; 695
552 return find(characters, length, static_cast<LChar>(matchCharacter), index); 696 template <typename CharacterType>
553 } 697 inline size_t reverseFind(const CharacterType* characters,
554 698 unsigned length,
555 inline size_t find(const LChar* characters, unsigned length, CharacterMatchFunct ionPtr matchFunction, unsigned index = 0) 699 CharacterType matchCharacter,
556 { 700 unsigned index = UINT_MAX) {
557 while (index < length) { 701 if (!length)
558 if (matchFunction(characters[index]))
559 return index;
560 ++index;
561 }
562 return kNotFound; 702 return kNotFound;
563 } 703 if (index >= length)
564 704 index = length - 1;
565 inline size_t find(const UChar* characters, unsigned length, CharacterMatchFunct ionPtr matchFunction, unsigned index = 0) 705 while (characters[index] != matchCharacter) {
566 { 706 if (!index--)
567 while (index < length) { 707 return kNotFound;
568 if (matchFunction(characters[index])) 708 }
569 return index; 709 return index;
570 ++index; 710 }
571 } 711
712 ALWAYS_INLINE size_t reverseFind(const UChar* characters,
713 unsigned length,
714 LChar matchCharacter,
715 unsigned index = UINT_MAX) {
716 return reverseFind(characters, length, static_cast<UChar>(matchCharacter),
717 index);
718 }
719
720 inline size_t reverseFind(const LChar* characters,
721 unsigned length,
722 UChar matchCharacter,
723 unsigned index = UINT_MAX) {
724 if (matchCharacter & ~0xFF)
572 return kNotFound; 725 return kNotFound;
573 } 726 return reverseFind(characters, length, static_cast<LChar>(matchCharacter),
574 727 index);
575 template<typename CharacterType> 728 }
576 inline size_t findNextLineStart(const CharacterType* characters, unsigned length , unsigned index = 0) 729
577 { 730 inline size_t StringImpl::find(LChar character, unsigned start) {
578 while (index < length) { 731 if (is8Bit())
579 CharacterType c = characters[index++]; 732 return WTF::find(characters8(), m_length, character, start);
580 if ((c != '\n') && (c != '\r')) 733 return WTF::find(characters16(), m_length, character, start);
581 continue; 734 }
582 735
583 // There can only be a start of a new line if there are more characters 736 ALWAYS_INLINE size_t StringImpl::find(char character, unsigned start) {
584 // beyond the current character. 737 return find(static_cast<LChar>(character), start);
585 if (index < length) { 738 }
586 // The 3 common types of line terminators are 1. \r\n (Windows), 739
587 // 2. \r (old MacOS) and 3. \n (Unix'es). 740 inline size_t StringImpl::find(UChar character, unsigned start) {
588 741 if (is8Bit())
589 if (c == '\n') 742 return WTF::find(characters8(), m_length, character, start);
590 return index; // Case 3: just \n. 743 return WTF::find(characters16(), m_length, character, start);
591 744 }
592 CharacterType c2 = characters[index]; 745
593 if (c2 != '\n') 746 inline unsigned lengthOfNullTerminatedString(const UChar* string) {
594 return index; // Case 2: just \r. 747 size_t length = 0;
595 748 while (string[length] != UChar(0))
596 // Case 1: \r\n. 749 ++length;
597 // But, there's only a start of a new line if there are more 750 RELEASE_ASSERT(length <= std::numeric_limits<unsigned>::max());
598 // characters beyond the \r\n. 751 return static_cast<unsigned>(length);
599 if (++index < length) 752 }
600 return index; 753
601 } 754 template <size_t inlineCapacity>
602 } 755 bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a,
603 return kNotFound; 756 StringImpl* b) {
604 } 757 if (!b)
605 758 return !a.size();
606 template<typename CharacterType> 759 if (a.size() != b->length())
607 inline size_t reverseFindLineTerminator(const CharacterType* characters, unsigne d length, unsigned index = UINT_MAX) 760 return false;
608 { 761 if (b->is8Bit())
609 if (!length) 762 return equal(a.data(), b->characters8(), b->length());
610 return kNotFound; 763 return equal(a.data(), b->characters16(), b->length());
611 if (index >= length) 764 }
612 index = length - 1; 765
613 CharacterType c = characters[index]; 766 template <typename CharacterType1, typename CharacterType2>
614 while ((c != '\n') && (c != '\r')) { 767 static inline int codePointCompare(unsigned l1,
615 if (!index--) 768 unsigned l2,
616 return kNotFound; 769 const CharacterType1* c1,
617 c = characters[index]; 770 const CharacterType2* c2) {
618 } 771 const unsigned lmin = l1 < l2 ? l1 : l2;
619 return index; 772 unsigned pos = 0;
620 } 773 while (pos < lmin && *c1 == *c2) {
621 774 ++c1;
622 template<typename CharacterType> 775 ++c2;
623 inline size_t reverseFind(const CharacterType* characters, unsigned length, Char acterType matchCharacter, unsigned index = UINT_MAX) 776 ++pos;
624 { 777 }
625 if (!length) 778
626 return kNotFound; 779 if (pos < lmin)
627 if (index >= length) 780 return (c1[0] > c2[0]) ? 1 : -1;
628 index = length - 1; 781
629 while (characters[index] != matchCharacter) { 782 if (l1 == l2)
630 if (!index--) 783 return 0;
631 return kNotFound; 784
632 } 785 return (l1 > l2) ? 1 : -1;
633 return index; 786 }
634 } 787
635 788 static inline int codePointCompare8(const StringImpl* string1,
636 ALWAYS_INLINE size_t reverseFind(const UChar* characters, unsigned length, LChar matchCharacter, unsigned index = UINT_MAX) 789 const StringImpl* string2) {
637 { 790 return codePointCompare(string1->length(), string2->length(),
638 return reverseFind(characters, length, static_cast<UChar>(matchCharacter), i ndex); 791 string1->characters8(), string2->characters8());
639 } 792 }
640 793
641 inline size_t reverseFind(const LChar* characters, unsigned length, UChar matchC haracter, unsigned index = UINT_MAX) 794 static inline int codePointCompare16(const StringImpl* string1,
642 { 795 const StringImpl* string2) {
643 if (matchCharacter & ~0xFF) 796 return codePointCompare(string1->length(), string2->length(),
644 return kNotFound; 797 string1->characters16(), string2->characters16());
645 return reverseFind(characters, length, static_cast<LChar>(matchCharacter), i ndex); 798 }
646 } 799
647 800 static inline int codePointCompare8To16(const StringImpl* string1,
648 inline size_t StringImpl::find(LChar character, unsigned start) 801 const StringImpl* string2) {
649 { 802 return codePointCompare(string1->length(), string2->length(),
650 if (is8Bit()) 803 string1->characters8(), string2->characters16());
651 return WTF::find(characters8(), m_length, character, start); 804 }
652 return WTF::find(characters16(), m_length, character, start); 805
653 } 806 static inline int codePointCompare(const StringImpl* string1,
654 807 const StringImpl* string2) {
655 ALWAYS_INLINE size_t StringImpl::find(char character, unsigned start) 808 if (!string1)
656 { 809 return (string2 && string2->length()) ? -1 : 0;
657 return find(static_cast<LChar>(character), start); 810
658 } 811 if (!string2)
659 812 return string1->length() ? 1 : 0;
660 inline size_t StringImpl::find(UChar character, unsigned start) 813
661 { 814 bool string1Is8Bit = string1->is8Bit();
662 if (is8Bit()) 815 bool string2Is8Bit = string2->is8Bit();
663 return WTF::find(characters8(), m_length, character, start); 816 if (string1Is8Bit) {
664 return WTF::find(characters16(), m_length, character, start);
665 }
666
667 inline unsigned lengthOfNullTerminatedString(const UChar* string)
668 {
669 size_t length = 0;
670 while (string[length] != UChar(0))
671 ++length;
672 RELEASE_ASSERT(length <= std::numeric_limits<unsigned>::max());
673 return static_cast<unsigned>(length);
674 }
675
676 template<size_t inlineCapacity>
677 bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, StringImpl* b)
678 {
679 if (!b)
680 return !a.size();
681 if (a.size() != b->length())
682 return false;
683 if (b->is8Bit())
684 return equal(a.data(), b->characters8(), b->length());
685 return equal(a.data(), b->characters16(), b->length());
686 }
687
688 template<typename CharacterType1, typename CharacterType2>
689 static inline int codePointCompare(unsigned l1, unsigned l2, const CharacterType 1* c1, const CharacterType2* c2)
690 {
691 const unsigned lmin = l1 < l2 ? l1 : l2;
692 unsigned pos = 0;
693 while (pos < lmin && *c1 == *c2) {
694 ++c1;
695 ++c2;
696 ++pos;
697 }
698
699 if (pos < lmin)
700 return (c1[0] > c2[0]) ? 1 : -1;
701
702 if (l1 == l2)
703 return 0;
704
705 return (l1 > l2) ? 1 : -1;
706 }
707
708 static inline int codePointCompare8(const StringImpl* string1, const StringImpl* string2)
709 {
710 return codePointCompare(string1->length(), string2->length(), string1->chara cters8(), string2->characters8());
711 }
712
713 static inline int codePointCompare16(const StringImpl* string1, const StringImpl * string2)
714 {
715 return codePointCompare(string1->length(), string2->length(), string1->chara cters16(), string2->characters16());
716 }
717
718 static inline int codePointCompare8To16(const StringImpl* string1, const StringI mpl* string2)
719 {
720 return codePointCompare(string1->length(), string2->length(), string1->chara cters8(), string2->characters16());
721 }
722
723 static inline int codePointCompare(const StringImpl* string1, const StringImpl* string2)
724 {
725 if (!string1)
726 return (string2 && string2->length()) ? -1 : 0;
727
728 if (!string2)
729 return string1->length() ? 1 : 0;
730
731 bool string1Is8Bit = string1->is8Bit();
732 bool string2Is8Bit = string2->is8Bit();
733 if (string1Is8Bit) {
734 if (string2Is8Bit)
735 return codePointCompare8(string1, string2);
736 return codePointCompare8To16(string1, string2);
737 }
738 if (string2Is8Bit) 817 if (string2Is8Bit)
739 return -codePointCompare8To16(string2, string1); 818 return codePointCompare8(string1, string2);
740 return codePointCompare16(string1, string2); 819 return codePointCompare8To16(string1, string2);
741 } 820 }
742 821 if (string2Is8Bit)
743 static inline bool isSpaceOrNewline(UChar c) 822 return -codePointCompare8To16(string2, string1);
744 { 823 return codePointCompare16(string1, string2);
745 // Use isASCIISpace() for basic Latin-1. 824 }
746 // This will include newlines, which aren't included in Unicode DirWS. 825
747 return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF: :Unicode::WhiteSpaceNeutral; 826 static inline bool isSpaceOrNewline(UChar c) {
748 } 827 // Use isASCIISpace() for basic Latin-1.
749 828 // This will include newlines, which aren't included in Unicode DirWS.
750 inline PassRefPtr<StringImpl> StringImpl::isolatedCopy() const 829 return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) ==
751 { 830 WTF::Unicode::WhiteSpaceNeutral;
752 if (is8Bit()) 831 }
753 return create(characters8(), m_length); 832
754 return create(characters16(), m_length); 833 inline PassRefPtr<StringImpl> StringImpl::isolatedCopy() const {
834 if (is8Bit())
835 return create(characters8(), m_length);
836 return create(characters16(), m_length);
755 } 837 }
756 838
757 // TODO(rob.buis) possibly find a better place for this method. 839 // TODO(rob.buis) possibly find a better place for this method.
758 // Turns a UChar32 to uppercase based on localeIdentifier. 840 // Turns a UChar32 to uppercase based on localeIdentifier.
759 WTF_EXPORT UChar32 toUpper(UChar32, const AtomicString& localeIdentifier); 841 WTF_EXPORT UChar32 toUpper(UChar32, const AtomicString& localeIdentifier);
760 842
761 struct StringHash; 843 struct StringHash;
762 844
763 // StringHash is the default hash for StringImpl* and RefPtr<StringImpl> 845 // StringHash is the default hash for StringImpl* and RefPtr<StringImpl>
764 template<typename T> struct DefaultHash; 846 template <typename T>
765 template<> struct DefaultHash<StringImpl*> { 847 struct DefaultHash;
766 typedef StringHash Hash; 848 template <>
849 struct DefaultHash<StringImpl*> {
850 typedef StringHash Hash;
767 }; 851 };
768 template<> struct DefaultHash<RefPtr<StringImpl>> { 852 template <>
769 typedef StringHash Hash; 853 struct DefaultHash<RefPtr<StringImpl>> {
854 typedef StringHash Hash;
770 }; 855 };
771
772 } 856 }
773 857
774 using WTF::StringImpl; 858 using WTF::StringImpl;
775 using WTF::equal; 859 using WTF::equal;
776 using WTF::equalNonNull; 860 using WTF::equalNonNull;
777 using WTF::TextCaseSensitivity; 861 using WTF::TextCaseSensitivity;
778 using WTF::TextCaseSensitive; 862 using WTF::TextCaseSensitive;
779 using WTF::TextCaseASCIIInsensitive; 863 using WTF::TextCaseASCIIInsensitive;
780 using WTF::TextCaseInsensitive; 864 using WTF::TextCaseInsensitive;
781 865
782 #endif 866 #endif
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/wtf/text/StringHash.h ('k') | third_party/WebKit/Source/wtf/text/StringImpl.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698