Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(861)

Side by Side Diff: third_party/WebKit/Source/wtf/text/StringImpl.cpp

Issue 1611343002: wtf reformat test Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: pydent Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org) 2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 * (C) 1999 Antti Koivisto (koivisto@kde.org) 3 * (C) 1999 Antti Koivisto (koivisto@kde.org)
4 * (C) 2001 Dirk Mueller ( mueller@kde.org ) 4 * (C) 2001 Dirk Mueller ( mueller@kde.org )
5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All r ights reserved. 5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All r ights reserved.
6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net) 6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
7 * 7 *
8 * This library is free software; you can redistribute it and/or 8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public 9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either 10 * License as published by the Free Software Foundation; either
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
48 #include "wtf/ThreadingPrimitives.h" 48 #include "wtf/ThreadingPrimitives.h"
49 #include <unistd.h> 49 #include <unistd.h>
50 #endif 50 #endif
51 51
52 using namespace std; 52 using namespace std;
53 53
54 namespace WTF { 54 namespace WTF {
55 55
56 using namespace Unicode; 56 using namespace Unicode;
57 57
58 static_assert(sizeof(StringImpl) == 3 * sizeof(int), "StringImpl should stay sma ll"); 58 static_assert(sizeof(StringImpl) == 3 * sizeof(int),
59 "StringImpl should stay small");
59 60
60 #ifdef STRING_STATS 61 #ifdef STRING_STATS
61 62
62 static Mutex& statsMutex() 63 static Mutex& statsMutex() {
63 { 64 DEFINE_STATIC_LOCAL(Mutex, mutex, ());
64 DEFINE_STATIC_LOCAL(Mutex, mutex, ()); 65 return mutex;
65 return mutex; 66 }
66 } 67
67 68 static HashSet<void*>& liveStrings() {
68 static HashSet<void*>& liveStrings() 69 // Notice that we can't use HashSet<StringImpl*> because then HashSet would de dup identical strings.
69 { 70 DEFINE_STATIC_LOCAL(HashSet<void*>, strings, ());
70 // Notice that we can't use HashSet<StringImpl*> because then HashSet would dedup identical strings. 71 return strings;
71 DEFINE_STATIC_LOCAL(HashSet<void*>, strings, ()); 72 }
72 return strings; 73
73 } 74 void addStringForStats(StringImpl* string) {
74 75 MutexLocker locker(statsMutex());
75 void addStringForStats(StringImpl* string) 76 liveStrings().add(string);
76 { 77 }
77 MutexLocker locker(statsMutex()); 78
78 liveStrings().add(string); 79 void removeStringForStats(StringImpl* string) {
79 } 80 MutexLocker locker(statsMutex());
80 81 liveStrings().remove(string);
81 void removeStringForStats(StringImpl* string) 82 }
82 { 83
83 MutexLocker locker(statsMutex()); 84 static void fillWithSnippet(const StringImpl* string, Vector<char>& snippet) {
84 liveStrings().remove(string); 85 const unsigned kMaxSnippetLength = 64;
85 } 86 snippet.clear();
86 87
87 static void fillWithSnippet(const StringImpl* string, Vector<char>& snippet) 88 size_t expectedLength = std::min(string->length(), kMaxSnippetLength);
88 { 89 if (expectedLength == kMaxSnippetLength)
89 const unsigned kMaxSnippetLength = 64; 90 expectedLength += 3; // For the "...".
90 snippet.clear(); 91 ++expectedLength; // For the terminating '\0'.
91 92 snippet.reserveCapacity(expectedLength);
92 size_t expectedLength = std::min(string->length(), kMaxSnippetLength); 93
93 if (expectedLength == kMaxSnippetLength) 94 size_t i;
94 expectedLength += 3; // For the "...". 95 for (i = 0; i < string->length() && i < kMaxSnippetLength; ++i) {
95 ++expectedLength; // For the terminating '\0'. 96 UChar c = (*string)[i];
96 snippet.reserveCapacity(expectedLength); 97 if (isASCIIPrintable(c))
97 98 snippet.append(c);
98 size_t i; 99 else
99 for (i = 0; i < string->length() && i < kMaxSnippetLength; ++i) { 100 snippet.append('?');
100 UChar c = (*string)[i]; 101 }
101 if (isASCIIPrintable(c)) 102 if (i < string->length()) {
102 snippet.append(c); 103 snippet.append('.');
103 else 104 snippet.append('.');
104 snippet.append('?'); 105 snippet.append('.');
105 } 106 }
106 if (i < string->length()) { 107 snippet.append('\0');
107 snippet.append('.'); 108 }
108 snippet.append('.'); 109
109 snippet.append('.'); 110 static bool isUnnecessarilyWide(const StringImpl* string) {
110 } 111 if (string->is8Bit())
111 snippet.append('\0'); 112 return false;
112 } 113 UChar c = 0;
113 114 for (unsigned i = 0; i < string->length(); ++i)
114 static bool isUnnecessarilyWide(const StringImpl* string) 115 c |= (*string)[i] >> 8;
115 { 116 return !c;
116 if (string->is8Bit())
117 return false;
118 UChar c = 0;
119 for (unsigned i = 0; i < string->length(); ++i)
120 c |= (*string)[i] >> 8;
121 return !c;
122 } 117 }
123 118
124 class PerStringStats : public RefCounted<PerStringStats> { 119 class PerStringStats : public RefCounted<PerStringStats> {
125 public: 120 public:
126 static PassRefPtr<PerStringStats> create() 121 static PassRefPtr<PerStringStats> create() {
127 { 122 return adoptRef(new PerStringStats);
128 return adoptRef(new PerStringStats); 123 }
129 } 124
130 125 void add(const StringImpl* string) {
131 void add(const StringImpl* string) 126 ++m_numberOfCopies;
132 { 127 if (!m_length) {
133 ++m_numberOfCopies; 128 m_length = string->length();
134 if (!m_length) { 129 fillWithSnippet(string, m_snippet);
135 m_length = string->length(); 130 }
136 fillWithSnippet(string, m_snippet); 131 if (string->isAtomic())
137 } 132 ++m_numberOfAtomicCopies;
138 if (string->isAtomic()) 133 if (isUnnecessarilyWide(string))
139 ++m_numberOfAtomicCopies; 134 m_unnecessarilyWide = true;
140 if (isUnnecessarilyWide(string)) 135 }
141 m_unnecessarilyWide = true; 136
142 } 137 size_t totalCharacters() const { return m_numberOfCopies * m_length; }
143 138
144 size_t totalCharacters() const 139 void print() {
145 { 140 const char* status = "ok";
146 return m_numberOfCopies * m_length; 141 if (m_unnecessarilyWide)
147 } 142 status = "16";
148 143 dataLogF("%8u copies (%s) of length %8u %s\n", m_numberOfCopies, status,
149 void print() 144 m_length, m_snippet.data());
150 { 145 }
151 const char* status = "ok"; 146
152 if (m_unnecessarilyWide) 147 bool m_unnecessarilyWide;
153 status = "16"; 148 unsigned m_numberOfCopies;
154 dataLogF("%8u copies (%s) of length %8u %s\n", m_numberOfCopies, status, m_length, m_snippet.data()); 149 unsigned m_length;
155 } 150 unsigned m_numberOfAtomicCopies;
156 151 Vector<char> m_snippet;
157 bool m_unnecessarilyWide; 152
158 unsigned m_numberOfCopies; 153 private:
159 unsigned m_length; 154 PerStringStats()
160 unsigned m_numberOfAtomicCopies; 155 : m_unnecessarilyWide(false),
161 Vector<char> m_snippet; 156 m_numberOfCopies(0),
162 157 m_length(0),
163 private: 158 m_numberOfAtomicCopies(0) {}
164 PerStringStats()
165 : m_unnecessarilyWide(false)
166 , m_numberOfCopies(0)
167 , m_length(0)
168 , m_numberOfAtomicCopies(0)
169 {
170 }
171 }; 159 };
172 160
173 bool operator<(const RefPtr<PerStringStats>& a, const RefPtr<PerStringStats>& b) 161 bool operator<(const RefPtr<PerStringStats>& a,
174 { 162 const RefPtr<PerStringStats>& b) {
175 if (a->m_unnecessarilyWide != b->m_unnecessarilyWide) 163 if (a->m_unnecessarilyWide != b->m_unnecessarilyWide)
176 return !a->m_unnecessarilyWide && b->m_unnecessarilyWide; 164 return !a->m_unnecessarilyWide && b->m_unnecessarilyWide;
177 if (a->totalCharacters() != b->totalCharacters()) 165 if (a->totalCharacters() != b->totalCharacters())
178 return a->totalCharacters() < b->totalCharacters(); 166 return a->totalCharacters() < b->totalCharacters();
179 if (a->m_numberOfCopies != b->m_numberOfCopies) 167 if (a->m_numberOfCopies != b->m_numberOfCopies)
180 return a->m_numberOfCopies < b->m_numberOfCopies; 168 return a->m_numberOfCopies < b->m_numberOfCopies;
181 if (a->m_length != b->m_length) 169 if (a->m_length != b->m_length)
182 return a->m_length < b->m_length; 170 return a->m_length < b->m_length;
183 return a->m_numberOfAtomicCopies < b->m_numberOfAtomicCopies; 171 return a->m_numberOfAtomicCopies < b->m_numberOfAtomicCopies;
184 } 172 }
185 173
186 static void printLiveStringStats(void*) 174 static void printLiveStringStats(void*) {
187 { 175 MutexLocker locker(statsMutex());
188 MutexLocker locker(statsMutex()); 176 HashSet<void*>& strings = liveStrings();
189 HashSet<void*>& strings = liveStrings(); 177
190 178 HashMap<StringImpl*, RefPtr<PerStringStats>> stats;
191 HashMap<StringImpl*, RefPtr<PerStringStats>> stats; 179 for (HashSet<void*>::iterator iter = strings.begin(); iter != strings.end();
192 for (HashSet<void*>::iterator iter = strings.begin(); iter != strings.end(); ++iter) { 180 ++iter) {
193 StringImpl* string = static_cast<StringImpl*>(*iter); 181 StringImpl* string = static_cast<StringImpl*>(*iter);
194 HashMap<StringImpl*, RefPtr<PerStringStats>>::iterator entry = stats.fin d(string); 182 HashMap<StringImpl*, RefPtr<PerStringStats>>::iterator entry =
195 RefPtr<PerStringStats> value = entry == stats.end() ? RefPtr<PerStringSt ats>(PerStringStats::create()) : entry->value; 183 stats.find(string);
196 value->add(string); 184 RefPtr<PerStringStats> value =
197 stats.set(string, value.release()); 185 entry == stats.end() ? RefPtr<PerStringStats>(PerStringStats::create())
198 } 186 : entry->value;
199 187 value->add(string);
200 Vector<RefPtr<PerStringStats>> all; 188 stats.set(string, value.release());
201 for (HashMap<StringImpl*, RefPtr<PerStringStats>>::iterator iter = stats.beg in(); iter != stats.end(); ++iter) 189 }
202 all.append(iter->value); 190
203 191 Vector<RefPtr<PerStringStats>> all;
204 std::sort(all.begin(), all.end()); 192 for (HashMap<StringImpl*, RefPtr<PerStringStats>>::iterator iter =
205 std::reverse(all.begin(), all.end()); 193 stats.begin();
206 for (size_t i = 0; i < 20 && i < all.size(); ++i) 194 iter != stats.end(); ++iter)
207 all[i]->print(); 195 all.append(iter->value);
196
197 std::sort(all.begin(), all.end());
198 std::reverse(all.begin(), all.end());
199 for (size_t i = 0; i < 20 && i < all.size(); ++i)
200 all[i]->print();
208 } 201 }
209 202
210 StringStats StringImpl::m_stringStats; 203 StringStats StringImpl::m_stringStats;
211 204
212 unsigned StringStats::s_stringRemovesTillPrintStats = StringStats::s_printString StatsFrequency; 205 unsigned StringStats::s_stringRemovesTillPrintStats =
213 206 StringStats::s_printStringStatsFrequency;
214 void StringStats::removeString(StringImpl* string) 207
215 { 208 void StringStats::removeString(StringImpl* string) {
216 unsigned length = string->length(); 209 unsigned length = string->length();
217 --m_totalNumberStrings; 210 --m_totalNumberStrings;
218 211
219 if (string->is8Bit()) { 212 if (string->is8Bit()) {
220 --m_number8BitStrings; 213 --m_number8BitStrings;
221 m_total8BitData -= length; 214 m_total8BitData -= length;
222 } else { 215 } else {
223 --m_number16BitStrings; 216 --m_number16BitStrings;
224 m_total16BitData -= length; 217 m_total16BitData -= length;
225 } 218 }
226 219
227 if (!--s_stringRemovesTillPrintStats) { 220 if (!--s_stringRemovesTillPrintStats) {
228 s_stringRemovesTillPrintStats = s_printStringStatsFrequency; 221 s_stringRemovesTillPrintStats = s_printStringStatsFrequency;
229 printStats(); 222 printStats();
230 } 223 }
231 } 224 }
232 225
233 void StringStats::printStats() 226 void StringStats::printStats() {
234 { 227 dataLogF("String stats for process id %d:\n", getpid());
235 dataLogF("String stats for process id %d:\n", getpid()); 228
236 229 unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitData;
237 unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitDat a; 230 double percent8Bit =
238 double percent8Bit = m_totalNumberStrings ? ((double)m_number8BitStrings * 1 00) / (double)m_totalNumberStrings : 0.0; 231 m_totalNumberStrings
239 double average8bitLength = m_number8BitStrings ? (double)m_total8BitData / ( double)m_number8BitStrings : 0.0; 232 ? ((double)m_number8BitStrings * 100) / (double)m_totalNumberStrings
240 dataLogF("%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData, av erage8bitLength); 233 : 0.0;
241 234 double average8bitLength =
242 double percent16Bit = m_totalNumberStrings ? ((double)m_number16BitStrings * 100) / (double)m_totalNumberStrings : 0.0; 235 m_number8BitStrings
243 double average16bitLength = m_number16BitStrings ? (double)m_total16BitData / (double)m_number16BitStrings : 0.0; 236 ? (double)m_total8BitData / (double)m_number8BitStrings
244 dataLogF("%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number16BitStrings, percent16Bit, m_total16BitData, m_total16BitData * 2, average16bitLength); 237 : 0.0;
245 238 dataLogF(
246 double averageLength = m_totalNumberStrings ? (double)totalNumberCharacters / (double)m_totalNumberStrings : 0.0; 239 "%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length "
247 unsigned long long totalDataBytes = m_total8BitData + m_total16BitData * 2; 240 "%6.1f\n",
248 dataLogF("%8u Total %12llu chars %12llu bytes avg length % 6.1f\n", m_totalNumberStrings, totalNumberCharacters, totalDataBytes, averageLen gth); 241 m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData,
249 unsigned long long totalSavedBytes = m_total8BitData; 242 average8bitLength);
250 double percentSavings = totalSavedBytes ? ((double)totalSavedBytes * 100) / (double)(totalDataBytes + totalSavedBytes) : 0.0; 243
251 dataLogF(" Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes, percentSavings); 244 double percent16Bit =
252 245 m_totalNumberStrings
253 unsigned totalOverhead = m_totalNumberStrings * sizeof(StringImpl); 246 ? ((double)m_number16BitStrings * 100) / (double)m_totalNumberStrings
254 double overheadPercent = (double)totalOverhead / (double)totalDataBytes * 10 0; 247 : 0.0;
255 dataLogF(" StringImpl overheader: %8u (%5.2f%%)\n", totalOverhead, o verheadPercent); 248 double average16bitLength =
256 249 m_number16BitStrings
257 internal::callOnMainThread(&printLiveStringStats, nullptr); 250 ? (double)m_total16BitData / (double)m_number16BitStrings
251 : 0.0;
252 dataLogF(
253 "%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length "
254 "%6.1f\n",
255 m_number16BitStrings, percent16Bit, m_total16BitData,
256 m_total16BitData * 2, average16bitLength);
257
258 double averageLength =
259 m_totalNumberStrings
260 ? (double)totalNumberCharacters / (double)m_totalNumberStrings
261 : 0.0;
262 unsigned long long totalDataBytes = m_total8BitData + m_total16BitData * 2;
263 dataLogF(
264 "%8u Total %12llu chars %12llu bytes avg length "
265 "%6.1f\n",
266 m_totalNumberStrings, totalNumberCharacters, totalDataBytes,
267 averageLength);
268 unsigned long long totalSavedBytes = m_total8BitData;
269 double percentSavings = totalSavedBytes
270 ? ((double)totalSavedBytes * 100) /
271 (double)(totalDataBytes + totalSavedBytes)
272 : 0.0;
273 dataLogF(" Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes,
274 percentSavings);
275
276 unsigned totalOverhead = m_totalNumberStrings * sizeof(StringImpl);
277 double overheadPercent = (double)totalOverhead / (double)totalDataBytes * 100;
278 dataLogF(" StringImpl overheader: %8u (%5.2f%%)\n", totalOverhead,
279 overheadPercent);
280
281 internal::callOnMainThread(&printLiveStringStats, nullptr);
258 } 282 }
259 #endif 283 #endif
260 284
261 void* StringImpl::operator new(size_t size) 285 void* StringImpl::operator new(size_t size) {
262 { 286 ASSERT(size == sizeof(StringImpl));
263 ASSERT(size == sizeof(StringImpl)); 287 return Partitions::bufferMalloc(size, "WTF::StringImpl");
264 return Partitions::bufferMalloc(size, "WTF::StringImpl"); 288 }
265 } 289
266 290 void StringImpl::operator delete(void* ptr) {
267 void StringImpl::operator delete(void* ptr) 291 Partitions::bufferFree(ptr);
268 { 292 }
269 Partitions::bufferFree(ptr); 293
270 } 294 inline StringImpl::~StringImpl() {
271 295 ASSERT(!isStatic());
272 inline StringImpl::~StringImpl() 296
273 { 297 STRING_STATS_REMOVE_STRING(this);
274 ASSERT(!isStatic()); 298
275 299 if (isAtomic())
276 STRING_STATS_REMOVE_STRING(this); 300 AtomicString::remove(this);
277 301 }
278 if (isAtomic()) 302
279 AtomicString::remove(this); 303 void StringImpl::destroyIfNotStatic() {
280 } 304 if (!isStatic())
281 305 delete this;
282 void StringImpl::destroyIfNotStatic() 306 }
283 { 307
284 if (!isStatic()) 308 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length,
285 delete this; 309 LChar*& data) {
286 } 310 if (!length) {
287 311 data = 0;
288 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, LChar*& data) 312 return empty();
289 { 313 }
290 if (!length) { 314
291 data = 0; 315 // Allocate a single buffer large enough to contain the StringImpl
292 return empty(); 316 // struct as well as the data which it contains. This removes one
293 } 317 // heap allocation from this call.
294 318 StringImpl* string = static_cast<StringImpl*>(Partitions::bufferMalloc(
295 // Allocate a single buffer large enough to contain the StringImpl 319 allocationSize<LChar>(length), "WTF::StringImpl"));
296 // struct as well as the data which it contains. This removes one 320
297 // heap allocation from this call. 321 data = reinterpret_cast<LChar*>(string + 1);
298 StringImpl* string = static_cast<StringImpl*>(Partitions::bufferMalloc(alloc ationSize<LChar>(length), "WTF::StringImpl")); 322 return adoptRef(new (string) StringImpl(length, Force8BitConstructor));
299 323 }
300 data = reinterpret_cast<LChar*>(string + 1); 324
301 return adoptRef(new (string) StringImpl(length, Force8BitConstructor)); 325 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length,
302 } 326 UChar*& data) {
303 327 if (!length) {
304 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data) 328 data = 0;
305 { 329 return empty();
306 if (!length) { 330 }
307 data = 0; 331
308 return empty(); 332 // Allocate a single buffer large enough to contain the StringImpl
309 } 333 // struct as well as the data which it contains. This removes one
310 334 // heap allocation from this call.
311 // Allocate a single buffer large enough to contain the StringImpl 335 StringImpl* string = static_cast<StringImpl*>(Partitions::bufferMalloc(
312 // struct as well as the data which it contains. This removes one 336 allocationSize<UChar>(length), "WTF::StringImpl"));
313 // heap allocation from this call. 337
314 StringImpl* string = static_cast<StringImpl*>(Partitions::bufferMalloc(alloc ationSize<UChar>(length), "WTF::StringImpl")); 338 data = reinterpret_cast<UChar*>(string + 1);
315 339 return adoptRef(new (string) StringImpl(length));
316 data = reinterpret_cast<UChar*>(string + 1); 340 }
317 return adoptRef(new (string) StringImpl(length)); 341
318 } 342 static StaticStringsTable& staticStrings() {
319 343 DEFINE_STATIC_LOCAL(StaticStringsTable, staticStrings, ());
320 static StaticStringsTable& staticStrings() 344 return staticStrings;
321 {
322 DEFINE_STATIC_LOCAL(StaticStringsTable, staticStrings, ());
323 return staticStrings;
324 } 345 }
325 346
326 #if ENABLE(ASSERT) 347 #if ENABLE(ASSERT)
327 static bool s_allowCreationOfStaticStrings = true; 348 static bool s_allowCreationOfStaticStrings = true;
328 #endif 349 #endif
329 350
330 const StaticStringsTable& StringImpl::allStaticStrings() 351 const StaticStringsTable& StringImpl::allStaticStrings() {
331 { 352 return staticStrings();
332 return staticStrings(); 353 }
333 } 354
334 355 void StringImpl::freezeStaticStrings() {
335 void StringImpl::freezeStaticStrings() 356 ASSERT(isMainThread());
336 {
337 ASSERT(isMainThread());
338 357
339 #if ENABLE(ASSERT) 358 #if ENABLE(ASSERT)
340 s_allowCreationOfStaticStrings = false; 359 s_allowCreationOfStaticStrings = false;
341 #endif 360 #endif
342 } 361 }
343 362
344 unsigned StringImpl::m_highestStaticStringLength = 0; 363 unsigned StringImpl::m_highestStaticStringLength = 0;
345 364
346 StringImpl* StringImpl::createStatic(const char* string, unsigned length, unsign ed hash) 365 StringImpl* StringImpl::createStatic(const char* string,
347 { 366 unsigned length,
348 ASSERT(s_allowCreationOfStaticStrings); 367 unsigned hash) {
349 ASSERT(string); 368 ASSERT(s_allowCreationOfStaticStrings);
350 ASSERT(length); 369 ASSERT(string);
351 370 ASSERT(length);
352 StaticStringsTable::const_iterator it = staticStrings().find(hash); 371
353 if (it != staticStrings().end()) { 372 StaticStringsTable::const_iterator it = staticStrings().find(hash);
354 ASSERT(!memcmp(string, it->value + 1, length * sizeof(LChar))); 373 if (it != staticStrings().end()) {
355 return it->value; 374 ASSERT(!memcmp(string, it->value + 1, length * sizeof(LChar)));
356 } 375 return it->value;
357 376 }
358 // Allocate a single buffer large enough to contain the StringImpl 377
359 // struct as well as the data which it contains. This removes one 378 // Allocate a single buffer large enough to contain the StringImpl
360 // heap allocation from this call. 379 // struct as well as the data which it contains. This removes one
361 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(Str ingImpl)) / sizeof(LChar))); 380 // heap allocation from this call.
362 size_t size = sizeof(StringImpl) + length * sizeof(LChar); 381 RELEASE_ASSERT(length <=
363 382 ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) /
364 WTF_INTERNAL_LEAK_SANITIZER_DISABLED_SCOPE; 383 sizeof(LChar)));
365 StringImpl* impl = static_cast<StringImpl*>(Partitions::bufferMalloc(size, " WTF::StringImpl")); 384 size_t size = sizeof(StringImpl) + length * sizeof(LChar);
366 385
367 LChar* data = reinterpret_cast<LChar*>(impl + 1); 386 WTF_INTERNAL_LEAK_SANITIZER_DISABLED_SCOPE;
368 impl = new (impl) StringImpl(length, hash, StaticString); 387 StringImpl* impl = static_cast<StringImpl*>(
369 memcpy(data, string, length * sizeof(LChar)); 388 Partitions::bufferMalloc(size, "WTF::StringImpl"));
389
390 LChar* data = reinterpret_cast<LChar*>(impl + 1);
391 impl = new (impl) StringImpl(length, hash, StaticString);
392 memcpy(data, string, length * sizeof(LChar));
370 #if ENABLE(ASSERT) 393 #if ENABLE(ASSERT)
371 impl->assertHashIsCorrect(); 394 impl->assertHashIsCorrect();
372 #endif 395 #endif
373 396
374 ASSERT(isMainThread()); 397 ASSERT(isMainThread());
375 m_highestStaticStringLength = std::max(m_highestStaticStringLength, length); 398 m_highestStaticStringLength = std::max(m_highestStaticStringLength, length);
376 staticStrings().add(hash, impl); 399 staticStrings().add(hash, impl);
377 WTF_ANNOTATE_BENIGN_RACE(impl, 400 WTF_ANNOTATE_BENIGN_RACE(impl,
378 "Benign race on the reference counter of a static string created by Stri ngImpl::createStatic"); 401 "Benign race on the reference counter of a static "
379 402 "string created by StringImpl::createStatic");
380 return impl; 403
381 } 404 return impl;
382 405 }
383 void StringImpl::reserveStaticStringsCapacityForSize(unsigned size) 406
384 { 407 void StringImpl::reserveStaticStringsCapacityForSize(unsigned size) {
385 ASSERT(s_allowCreationOfStaticStrings); 408 ASSERT(s_allowCreationOfStaticStrings);
386 staticStrings().reserveCapacityForSize(size); 409 staticStrings().reserveCapacityForSize(size);
387 } 410 }
388 411
389 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned leng th) 412 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters,
390 { 413 unsigned length) {
391 if (!characters || !length) 414 if (!characters || !length)
392 return empty(); 415 return empty();
393 416
394 UChar* data; 417 UChar* data;
395 RefPtr<StringImpl> string = createUninitialized(length, data); 418 RefPtr<StringImpl> string = createUninitialized(length, data);
396 memcpy(data, characters, length * sizeof(UChar)); 419 memcpy(data, characters, length * sizeof(UChar));
397 return string.release(); 420 return string.release();
398 } 421 }
399 422
400 PassRefPtr<StringImpl> StringImpl::create(const LChar* characters, unsigned leng th) 423 PassRefPtr<StringImpl> StringImpl::create(const LChar* characters,
401 { 424 unsigned length) {
402 if (!characters || !length) 425 if (!characters || !length)
403 return empty(); 426 return empty();
404 427
405 LChar* data; 428 LChar* data;
406 RefPtr<StringImpl> string = createUninitialized(length, data); 429 RefPtr<StringImpl> string = createUninitialized(length, data);
407 memcpy(data, characters, length * sizeof(LChar)); 430 memcpy(data, characters, length * sizeof(LChar));
408 return string.release(); 431 return string.release();
409 } 432 }
410 433
411 PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters, unsigned length) 434 PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters,
412 { 435 unsigned length) {
413 if (!characters || !length) 436 if (!characters || !length)
414 return empty(); 437 return empty();
415 438
416 LChar* data; 439 LChar* data;
417 RefPtr<StringImpl> string = createUninitialized(length, data); 440 RefPtr<StringImpl> string = createUninitialized(length, data);
418 441
419 for (size_t i = 0; i < length; ++i) { 442 for (size_t i = 0; i < length; ++i) {
420 if (characters[i] & 0xff00) 443 if (characters[i] & 0xff00)
421 return create(characters, length); 444 return create(characters, length);
422 data[i] = static_cast<LChar>(characters[i]); 445 data[i] = static_cast<LChar>(characters[i]);
423 } 446 }
424 447
425 return string.release(); 448 return string.release();
426 } 449 }
427 450
428 PassRefPtr<StringImpl> StringImpl::create(const LChar* string) 451 PassRefPtr<StringImpl> StringImpl::create(const LChar* string) {
429 { 452 if (!string)
430 if (!string) 453 return empty();
431 return empty(); 454 size_t length = strlen(reinterpret_cast<const char*>(string));
432 size_t length = strlen(reinterpret_cast<const char*>(string)); 455 RELEASE_ASSERT(length <= numeric_limits<unsigned>::max());
433 RELEASE_ASSERT(length <= numeric_limits<unsigned>::max()); 456 return create(string, length);
434 return create(string, length); 457 }
435 } 458
436 459 bool StringImpl::containsOnlyWhitespace() {
437 bool StringImpl::containsOnlyWhitespace() 460 // FIXME: The definition of whitespace here includes a number of characters
438 { 461 // that are not whitespace from the point of view of LayoutText; I wonder if
439 // FIXME: The definition of whitespace here includes a number of characters 462 // that's a problem in practice.
440 // that are not whitespace from the point of view of LayoutText; I wonder if 463 if (is8Bit()) {
441 // that's a problem in practice.
442 if (is8Bit()) {
443 for (unsigned i = 0; i < m_length; ++i) {
444 UChar c = characters8()[i];
445 if (!isASCIISpace(c))
446 return false;
447 }
448
449 return true;
450 }
451
452 for (unsigned i = 0; i < m_length; ++i) { 464 for (unsigned i = 0; i < m_length; ++i) {
453 UChar c = characters16()[i]; 465 UChar c = characters8()[i];
454 if (!isASCIISpace(c)) 466 if (!isASCIISpace(c))
455 return false; 467 return false;
456 } 468 }
469
457 return true; 470 return true;
458 } 471 }
459 472
460 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length) 473 for (unsigned i = 0; i < m_length; ++i) {
461 { 474 UChar c = characters16()[i];
462 if (start >= m_length) 475 if (!isASCIISpace(c))
463 return empty(); 476 return false;
464 unsigned maxLength = m_length - start; 477 }
465 if (length >= maxLength) { 478 return true;
466 if (!start) 479 }
467 return this; 480
468 length = maxLength; 481 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length) {
469 } 482 if (start >= m_length)
470 if (is8Bit()) 483 return empty();
471 return create(characters8() + start, length); 484 unsigned maxLength = m_length - start;
472 485 if (length >= maxLength) {
473 return create(characters16() + start, length); 486 if (!start)
474 } 487 return this;
475 488 length = maxLength;
476 UChar32 StringImpl::characterStartingAt(unsigned i) 489 }
477 { 490 if (is8Bit())
478 if (is8Bit()) 491 return create(characters8() + start, length);
479 return characters8()[i]; 492
480 if (U16_IS_SINGLE(characters16()[i])) 493 return create(characters16() + start, length);
481 return characters16()[i]; 494 }
482 if (i + 1 < m_length && U16_IS_LEAD(characters16()[i]) && U16_IS_TRAIL(chara cters16()[i + 1])) 495
483 return U16_GET_SUPPLEMENTARY(characters16()[i], characters16()[i + 1]); 496 UChar32 StringImpl::characterStartingAt(unsigned i) {
484 return 0; 497 if (is8Bit())
485 } 498 return characters8()[i];
486 499 if (U16_IS_SINGLE(characters16()[i]))
487 PassRefPtr<StringImpl> StringImpl::lowerASCII() 500 return characters16()[i];
488 { 501 if (i + 1 < m_length && U16_IS_LEAD(characters16()[i]) &&
489 502 U16_IS_TRAIL(characters16()[i + 1]))
490 // First scan the string for uppercase and non-ASCII characters: 503 return U16_GET_SUPPLEMENTARY(characters16()[i], characters16()[i + 1]);
491 if (is8Bit()) { 504 return 0;
492 unsigned firstIndexToBeLowered = m_length; 505 }
493 for (unsigned i = 0; i < m_length; ++i) { 506
494 LChar ch = characters8()[i]; 507 PassRefPtr<StringImpl> StringImpl::lowerASCII() {
495 if (isASCIIUpper(ch)) { 508 // First scan the string for uppercase and non-ASCII characters:
496 firstIndexToBeLowered = i; 509 if (is8Bit()) {
497 break; 510 unsigned firstIndexToBeLowered = m_length;
498 } 511 for (unsigned i = 0; i < m_length; ++i) {
499 } 512 LChar ch = characters8()[i];
500 513 if (isASCIIUpper(ch)) {
501 // Nothing to do if the string is all ASCII with no uppercase. 514 firstIndexToBeLowered = i;
502 if (firstIndexToBeLowered == m_length) { 515 break;
503 return this; 516 }
504 } 517 }
505 518
506 LChar* data8;
507 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);
508 memcpy(data8, characters8(), firstIndexToBeLowered);
509
510 for (unsigned i = firstIndexToBeLowered; i < m_length; ++i) {
511 LChar ch = characters8()[i];
512 data8[i] = isASCIIUpper(ch) ? toASCIILower(ch) : ch;
513 }
514 return newImpl.release();
515 }
516 bool noUpper = true;
517 UChar ored = 0;
518
519 const UChar* end = characters16() + m_length;
520 for (const UChar* chp = characters16(); chp != end; ++chp) {
521 if (isASCIIUpper(*chp))
522 noUpper = false;
523 ored |= *chp;
524 }
525 // Nothing to do if the string is all ASCII with no uppercase. 519 // Nothing to do if the string is all ASCII with no uppercase.
526 if (noUpper && !(ored & ~0x7F)) 520 if (firstIndexToBeLowered == m_length) {
527 return this; 521 return this;
528 522 }
529 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<unsigned>::m ax())); 523
530 unsigned length = m_length; 524 LChar* data8;
531 525 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);
526 memcpy(data8, characters8(), firstIndexToBeLowered);
527
528 for (unsigned i = firstIndexToBeLowered; i < m_length; ++i) {
529 LChar ch = characters8()[i];
530 data8[i] = isASCIIUpper(ch) ? toASCIILower(ch) : ch;
531 }
532 return newImpl.release();
533 }
534 bool noUpper = true;
535 UChar ored = 0;
536
537 const UChar* end = characters16() + m_length;
538 for (const UChar* chp = characters16(); chp != end; ++chp) {
539 if (isASCIIUpper(*chp))
540 noUpper = false;
541 ored |= *chp;
542 }
543 // Nothing to do if the string is all ASCII with no uppercase.
544 if (noUpper && !(ored & ~0x7F))
545 return this;
546
547 RELEASE_ASSERT(m_length <=
548 static_cast<unsigned>(numeric_limits<unsigned>::max()));
549 unsigned length = m_length;
550
551 UChar* data16;
552 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
553
554 for (unsigned i = 0; i < length; ++i) {
555 UChar c = characters16()[i];
556 data16[i] = isASCIIUpper(c) ? toASCIILower(c) : c;
557 }
558 return newImpl.release();
559 }
560
561 PassRefPtr<StringImpl> StringImpl::lower() {
562 // Note: This is a hot function in the Dromaeo benchmark, specifically the
563 // no-op code path up through the first 'return' statement.
564
565 // First scan the string for uppercase and non-ASCII characters:
566 if (is8Bit()) {
567 unsigned firstIndexToBeLowered = m_length;
568 for (unsigned i = 0; i < m_length; ++i) {
569 LChar ch = characters8()[i];
570 if (UNLIKELY(isASCIIUpper(ch) || ch & ~0x7F)) {
571 firstIndexToBeLowered = i;
572 break;
573 }
574 }
575
576 // Nothing to do if the string is all ASCII with no uppercase.
577 if (firstIndexToBeLowered == m_length)
578 return this;
579
580 LChar* data8;
581 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);
582 memcpy(data8, characters8(), firstIndexToBeLowered);
583
584 for (unsigned i = firstIndexToBeLowered; i < m_length; ++i) {
585 LChar ch = characters8()[i];
586 data8[i] = UNLIKELY(ch & ~0x7F) ? static_cast<LChar>(Unicode::toLower(ch))
587 : toASCIILower(ch);
588 }
589
590 return newImpl.release();
591 }
592
593 bool noUpper = true;
594 UChar ored = 0;
595
596 const UChar* end = characters16() + m_length;
597 for (const UChar* chp = characters16(); chp != end; ++chp) {
598 if (UNLIKELY(isASCIIUpper(*chp)))
599 noUpper = false;
600 ored |= *chp;
601 }
602 // Nothing to do if the string is all ASCII with no uppercase.
603 if (noUpper && !(ored & ~0x7F))
604 return this;
605
606 RELEASE_ASSERT(m_length <=
607 static_cast<unsigned>(numeric_limits<int32_t>::max()));
608 int32_t length = m_length;
609
610 if (!(ored & ~0x7F)) {
532 UChar* data16; 611 UChar* data16;
533 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); 612 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
534 613
535 for (unsigned i = 0; i < length; ++i) { 614 for (int32_t i = 0; i < length; ++i) {
536 UChar c = characters16()[i]; 615 UChar c = characters16()[i];
537 data16[i] = isASCIIUpper(c) ? toASCIILower(c) : c; 616 data16[i] = toASCIILower(c);
538 } 617 }
539 return newImpl.release(); 618 return newImpl.release();
540 } 619 }
541 620
542 PassRefPtr<StringImpl> StringImpl::lower() 621 // Do a slower implementation for cases that include non-ASCII characters.
543 { 622 UChar* data16;
544 // Note: This is a hot function in the Dromaeo benchmark, specifically the 623 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
545 // no-op code path up through the first 'return' statement. 624
546 625 bool error;
547 // First scan the string for uppercase and non-ASCII characters: 626 int32_t realLength =
627 Unicode::toLower(data16, length, characters16(), m_length, &error);
628 if (!error && realLength == length)
629 return newImpl.release();
630
631 newImpl = createUninitialized(realLength, data16);
632 Unicode::toLower(data16, realLength, characters16(), m_length, &error);
633 if (error)
634 return this;
635 return newImpl.release();
636 }
637
638 PassRefPtr<StringImpl> StringImpl::upper() {
639 // This function could be optimized for no-op cases the way lower() is,
640 // but in empirical testing, few actual calls to upper() are no-ops, so
641 // it wouldn't be worth the extra time for pre-scanning.
642
643 RELEASE_ASSERT(m_length <=
644 static_cast<unsigned>(numeric_limits<int32_t>::max()));
645 int32_t length = m_length;
646
647 if (is8Bit()) {
648 LChar* data8;
649 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);
650
651 // Do a faster loop for the case where all the characters are ASCII.
652 LChar ored = 0;
653 for (int i = 0; i < length; ++i) {
654 LChar c = characters8()[i];
655 ored |= c;
656 data8[i] = toASCIIUpper(c);
657 }
658 if (!(ored & ~0x7F))
659 return newImpl.release();
660
661 // Do a slower implementation for cases that include non-ASCII Latin-1 chara cters.
662 int numberSharpSCharacters = 0;
663
664 // There are two special cases.
665 // 1. latin-1 characters when converted to upper case are 16 bit characters .
666 // 2. Lower case sharp-S converts to "SS" (two characters)
667 for (int32_t i = 0; i < length; ++i) {
668 LChar c = characters8()[i];
669 if (UNLIKELY(c == smallLetterSharpSCharacter))
670 ++numberSharpSCharacters;
671 UChar upper = static_cast<UChar>(Unicode::toUpper(c));
672 if (UNLIKELY(upper > 0xff)) {
673 // Since this upper-cased character does not fit in an 8-bit string, we need to take the 16-bit path.
674 goto upconvert;
675 }
676 data8[i] = static_cast<LChar>(upper);
677 }
678
679 if (!numberSharpSCharacters)
680 return newImpl.release();
681
682 // We have numberSSCharacters sharp-s characters, but none of the other spec ial characters.
683 newImpl = createUninitialized(m_length + numberSharpSCharacters, data8);
684
685 LChar* dest = data8;
686
687 for (int32_t i = 0; i < length; ++i) {
688 LChar c = characters8()[i];
689 if (c == smallLetterSharpSCharacter) {
690 *dest++ = 'S';
691 *dest++ = 'S';
692 } else {
693 *dest++ = static_cast<LChar>(Unicode::toUpper(c));
694 }
695 }
696
697 return newImpl.release();
698 }
699
700 upconvert:
701 RefPtr<StringImpl> upconverted = upconvertedString();
702 const UChar* source16 = upconverted->characters16();
703
704 UChar* data16;
705 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
706
707 // Do a faster loop for the case where all the characters are ASCII.
708 UChar ored = 0;
709 for (int i = 0; i < length; ++i) {
710 UChar c = source16[i];
711 ored |= c;
712 data16[i] = toASCIIUpper(c);
713 }
714 if (!(ored & ~0x7F))
715 return newImpl.release();
716
717 // Do a slower implementation for cases that include non-ASCII characters.
718 bool error;
719 int32_t realLength =
720 Unicode::toUpper(data16, length, source16, m_length, &error);
721 if (!error && realLength == length)
722 return newImpl;
723 newImpl = createUninitialized(realLength, data16);
724 Unicode::toUpper(data16, realLength, source16, m_length, &error);
725 if (error)
726 return this;
727 return newImpl.release();
728 }
729
730 static inline bool localeIdMatchesLang(const AtomicString& localeId,
731 const char* lang) {
732 size_t langLength = strlen(lang);
733 RELEASE_ASSERT(langLength >= 2 && langLength <= 3);
734 if (!localeId.impl() ||
735 !localeId.impl()->startsWithIgnoringCase(lang, langLength))
736 return false;
737 if (localeId.impl()->length() == langLength)
738 return true;
739 const UChar maybeDelimiter = (*localeId.impl())[langLength];
740 return maybeDelimiter == '-' || maybeDelimiter == '_' ||
741 maybeDelimiter == '@';
742 }
743
744 typedef int32_t (*icuCaseConverter)(UChar*,
745 int32_t,
746 const UChar*,
747 int32_t,
748 const char*,
749 UErrorCode*);
750
751 static PassRefPtr<StringImpl> caseConvert(const UChar* source16,
752 size_t length,
753 icuCaseConverter converter,
754 const char* locale,
755 StringImpl* originalString) {
756 UChar* data16;
757 size_t targetLength = length;
758 RefPtr<StringImpl> output = StringImpl::createUninitialized(length, data16);
759 do {
760 UErrorCode status = U_ZERO_ERROR;
761 targetLength =
762 converter(data16, targetLength, source16, length, locale, &status);
763 if (U_SUCCESS(status)) {
764 if (length > 0)
765 return output->substring(0, targetLength);
766 return output.release();
767 }
768 if (status != U_BUFFER_OVERFLOW_ERROR)
769 return originalString;
770 // Expand the buffer.
771 output = StringImpl::createUninitialized(targetLength, data16);
772 } while (true);
773 }
774
775 PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier) {
776 // Use the more-optimized code path most of the time.
777 // Only Turkic (tr and az) languages and Lithuanian requires
778 // locale-specific lowercasing rules. Even though CLDR has el-Lower,
779 // it's identical to the locale-agnostic lowercasing. Context-dependent
780 // handling of Greek capital sigma is built into the common lowercasing
781 // function in ICU.
782 const char* localeForConversion = 0;
783 if (localeIdMatchesLang(localeIdentifier, "tr") ||
784 localeIdMatchesLang(localeIdentifier, "az"))
785 localeForConversion = "tr";
786 else if (localeIdMatchesLang(localeIdentifier, "lt"))
787 localeForConversion = "lt";
788 else
789 return lower();
790
791 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
792 CRASH();
793 int length = m_length;
794
795 RefPtr<StringImpl> upconverted = upconvertedString();
796 const UChar* source16 = upconverted->characters16();
797 return caseConvert(source16, length, u_strToLower, localeForConversion, this);
798 }
799
800 PassRefPtr<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier) {
801 // Use the more-optimized code path most of the time.
802 // Only Turkic (tr and az) languages and Greek require locale-specific
803 // lowercasing rules.
804 icu::UnicodeString transliteratorId;
805 const char* localeForConversion = 0;
806 if (localeIdMatchesLang(localeIdentifier, "tr") ||
807 localeIdMatchesLang(localeIdentifier, "az"))
808 localeForConversion = "tr";
809 else if (localeIdMatchesLang(localeIdentifier, "el"))
810 transliteratorId = UNICODE_STRING_SIMPLE("el-Upper");
811 else if (localeIdMatchesLang(localeIdentifier, "lt"))
812 localeForConversion = "lt";
813 else
814 return upper();
815
816 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
817 CRASH();
818 int length = m_length;
819
820 RefPtr<StringImpl> upconverted = upconvertedString();
821 const UChar* source16 = upconverted->characters16();
822
823 if (localeForConversion)
824 return caseConvert(source16, length, u_strToUpper, localeForConversion,
825 this);
826
827 // TODO(jungshik): Cache transliterator if perf penaly warrants it for Greek.
828 UErrorCode status = U_ZERO_ERROR;
829 OwnPtr<icu::Transliterator> translit =
830 adoptPtr(icu::Transliterator::createInstance(transliteratorId,
831 UTRANS_FORWARD, status));
832 if (U_FAILURE(status))
833 return upper();
834
835 // target will be copy-on-write.
836 icu::UnicodeString target(false, source16, length);
837 translit->transliterate(target);
838
839 return create(target.getBuffer(), target.length());
840 }
841
842 PassRefPtr<StringImpl> StringImpl::fill(UChar character) {
843 if (!(character & ~0x7F)) {
844 LChar* data;
845 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
846 for (unsigned i = 0; i < m_length; ++i)
847 data[i] = static_cast<LChar>(character);
848 return newImpl.release();
849 }
850 UChar* data;
851 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
852 for (unsigned i = 0; i < m_length; ++i)
853 data[i] = character;
854 return newImpl.release();
855 }
856
857 PassRefPtr<StringImpl> StringImpl::foldCase() {
858 RELEASE_ASSERT(m_length <=
859 static_cast<unsigned>(numeric_limits<int32_t>::max()));
860 int32_t length = m_length;
861
862 if (is8Bit()) {
863 // Do a faster loop for the case where all the characters are ASCII.
864 LChar* data;
865 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
866 LChar ored = 0;
867
868 for (int32_t i = 0; i < length; ++i) {
869 LChar c = characters8()[i];
870 data[i] = toASCIILower(c);
871 ored |= c;
872 }
873
874 if (!(ored & ~0x7F))
875 return newImpl.release();
876
877 // Do a slower implementation for cases that include non-ASCII Latin-1 chara cters.
878 for (int32_t i = 0; i < length; ++i)
879 data[i] = static_cast<LChar>(Unicode::toLower(characters8()[i]));
880
881 return newImpl.release();
882 }
883
884 // Do a faster loop for the case where all the characters are ASCII.
885 UChar* data;
886 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
887 UChar ored = 0;
888 for (int32_t i = 0; i < length; ++i) {
889 UChar c = characters16()[i];
890 ored |= c;
891 data[i] = toASCIILower(c);
892 }
893 if (!(ored & ~0x7F))
894 return newImpl.release();
895
896 // Do a slower implementation for cases that include non-ASCII characters.
897 bool error;
898 int32_t realLength =
899 Unicode::foldCase(data, length, characters16(), m_length, &error);
900 if (!error && realLength == length)
901 return newImpl.release();
902 newImpl = createUninitialized(realLength, data);
903 Unicode::foldCase(data, realLength, characters16(), m_length, &error);
904 if (error)
905 return this;
906 return newImpl.release();
907 }
908
909 template <class UCharPredicate>
910 inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters(
911 UCharPredicate predicate) {
912 if (!m_length)
913 return empty();
914
915 unsigned start = 0;
916 unsigned end = m_length - 1;
917
918 // skip white space from start
919 while (start <= end &&
920 predicate(is8Bit() ? characters8()[start] : characters16()[start]))
921 ++start;
922
923 // only white space
924 if (start > end)
925 return empty();
926
927 // skip white space from end
928 while (end && predicate(is8Bit() ? characters8()[end] : characters16()[end]))
929 --end;
930
931 if (!start && end == m_length - 1)
932 return this;
933 if (is8Bit())
934 return create(characters8() + start, end + 1 - start);
935 return create(characters16() + start, end + 1 - start);
936 }
937
938 class UCharPredicate final {
939 STACK_ALLOCATED();
940
941 public:
942 inline UCharPredicate(CharacterMatchFunctionPtr function)
943 : m_function(function) {}
944
945 inline bool operator()(UChar ch) const { return m_function(ch); }
946
947 private:
948 const CharacterMatchFunctionPtr m_function;
949 };
950
951 class SpaceOrNewlinePredicate final {
952 STACK_ALLOCATED();
953
954 public:
955 inline bool operator()(UChar ch) const { return isSpaceOrNewline(ch); }
956 };
957
958 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace() {
959 return stripMatchedCharacters(SpaceOrNewlinePredicate());
960 }
961
962 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace(
963 IsWhiteSpaceFunctionPtr isWhiteSpace) {
964 return stripMatchedCharacters(UCharPredicate(isWhiteSpace));
965 }
966
967 template <typename CharType>
968 ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters(
969 const CharType* characters,
970 CharacterMatchFunctionPtr findMatch) {
971 const CharType* from = characters;
972 const CharType* fromend = from + m_length;
973
974 // Assume the common case will not remove any characters
975 while (from != fromend && !findMatch(*from))
976 ++from;
977 if (from == fromend)
978 return this;
979
980 StringBuffer<CharType> data(m_length);
981 CharType* to = data.characters();
982 unsigned outc = from - characters;
983
984 if (outc)
985 memcpy(to, characters, outc * sizeof(CharType));
986
987 while (true) {
988 while (from != fromend && findMatch(*from))
989 ++from;
990 while (from != fromend && !findMatch(*from))
991 to[outc++] = *from++;
992 if (from == fromend)
993 break;
994 }
995
996 data.shrink(outc);
997
998 return data.release();
999 }
1000
1001 PassRefPtr<StringImpl> StringImpl::removeCharacters(
1002 CharacterMatchFunctionPtr findMatch) {
1003 if (is8Bit())
1004 return removeCharacters(characters8(), findMatch);
1005 return removeCharacters(characters16(), findMatch);
1006 }
1007
1008 template <typename CharType, class UCharPredicate>
1009 inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(
1010 UCharPredicate predicate,
1011 StripBehavior stripBehavior) {
1012 StringBuffer<CharType> data(m_length);
1013
1014 const CharType* from = getCharacters<CharType>();
1015 const CharType* fromend = from + m_length;
1016 int outc = 0;
1017 bool changedToSpace = false;
1018
1019 CharType* to = data.characters();
1020
1021 if (stripBehavior == StripExtraWhiteSpace) {
1022 while (true) {
1023 while (from != fromend && predicate(*from)) {
1024 if (*from != ' ')
1025 changedToSpace = true;
1026 ++from;
1027 }
1028 while (from != fromend && !predicate(*from))
1029 to[outc++] = *from++;
1030 if (from != fromend)
1031 to[outc++] = ' ';
1032 else
1033 break;
1034 }
1035
1036 if (outc > 0 && to[outc - 1] == ' ')
1037 --outc;
1038 } else {
1039 for (; from != fromend; ++from) {
1040 if (predicate(*from)) {
1041 if (*from != ' ')
1042 changedToSpace = true;
1043 to[outc++] = ' ';
1044 } else {
1045 to[outc++] = *from;
1046 }
1047 }
1048 }
1049
1050 if (static_cast<unsigned>(outc) == m_length && !changedToSpace)
1051 return this;
1052
1053 data.shrink(outc);
1054
1055 return data.release();
1056 }
1057
1058 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(
1059 StripBehavior stripBehavior) {
1060 if (is8Bit())
1061 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(
1062 SpaceOrNewlinePredicate(), stripBehavior);
1063 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(
1064 SpaceOrNewlinePredicate(), stripBehavior);
1065 }
1066
1067 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(
1068 IsWhiteSpaceFunctionPtr isWhiteSpace,
1069 StripBehavior stripBehavior) {
1070 if (is8Bit())
1071 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(
1072 UCharPredicate(isWhiteSpace), stripBehavior);
1073 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(
1074 UCharPredicate(isWhiteSpace), stripBehavior);
1075 }
1076
1077 int StringImpl::toIntStrict(bool* ok, int base) {
1078 if (is8Bit())
1079 return charactersToIntStrict(characters8(), m_length, ok, base);
1080 return charactersToIntStrict(characters16(), m_length, ok, base);
1081 }
1082
1083 unsigned StringImpl::toUIntStrict(bool* ok, int base) {
1084 if (is8Bit())
1085 return charactersToUIntStrict(characters8(), m_length, ok, base);
1086 return charactersToUIntStrict(characters16(), m_length, ok, base);
1087 }
1088
1089 int64_t StringImpl::toInt64Strict(bool* ok, int base) {
1090 if (is8Bit())
1091 return charactersToInt64Strict(characters8(), m_length, ok, base);
1092 return charactersToInt64Strict(characters16(), m_length, ok, base);
1093 }
1094
1095 uint64_t StringImpl::toUInt64Strict(bool* ok, int base) {
1096 if (is8Bit())
1097 return charactersToUInt64Strict(characters8(), m_length, ok, base);
1098 return charactersToUInt64Strict(characters16(), m_length, ok, base);
1099 }
1100
1101 int StringImpl::toInt(bool* ok) {
1102 if (is8Bit())
1103 return charactersToInt(characters8(), m_length, ok);
1104 return charactersToInt(characters16(), m_length, ok);
1105 }
1106
1107 unsigned StringImpl::toUInt(bool* ok) {
1108 if (is8Bit())
1109 return charactersToUInt(characters8(), m_length, ok);
1110 return charactersToUInt(characters16(), m_length, ok);
1111 }
1112
1113 int64_t StringImpl::toInt64(bool* ok) {
1114 if (is8Bit())
1115 return charactersToInt64(characters8(), m_length, ok);
1116 return charactersToInt64(characters16(), m_length, ok);
1117 }
1118
1119 uint64_t StringImpl::toUInt64(bool* ok) {
1120 if (is8Bit())
1121 return charactersToUInt64(characters8(), m_length, ok);
1122 return charactersToUInt64(characters16(), m_length, ok);
1123 }
1124
1125 double StringImpl::toDouble(bool* ok) {
1126 if (is8Bit())
1127 return charactersToDouble(characters8(), m_length, ok);
1128 return charactersToDouble(characters16(), m_length, ok);
1129 }
1130
1131 float StringImpl::toFloat(bool* ok) {
1132 if (is8Bit())
1133 return charactersToFloat(characters8(), m_length, ok);
1134 return charactersToFloat(characters16(), m_length, ok);
1135 }
1136
1137 // Table is based on ftp://ftp.unicode.org/Public/UNIDATA/CaseFolding.txt
1138 const UChar StringImpl::latin1CaseFoldTable[256] = {
1139 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008,
1140 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011,
1141 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a,
1142 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023,
1143 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c,
1144 0x002d, 0x002e, 0x002f, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035,
1145 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e,
1146 0x003f, 0x0040, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
1147 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 0x0070,
1148 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079,
1149 0x007a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 0x0060, 0x0061, 0x0062,
1150 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b,
1151 0x006c, 0x006d, 0x006e, 0x006f, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074,
1152 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d,
1153 0x007e, 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086,
1154 0x0087, 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
1155 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098,
1156 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1,
1157 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00aa,
1158 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0, 0x00b1, 0x00b2, 0x00b3,
1159 0x00b4, 0x03bc, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc,
1160 0x00bd, 0x00be, 0x00bf, 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5,
1161 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee,
1162 0x00ef, 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00d7,
1163 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00df, 0x00e0,
1164 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9,
1165 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 0x00f0, 0x00f1, 0x00f2,
1166 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00f9, 0x00fa, 0x00fb,
1167 0x00fc, 0x00fd, 0x00fe, 0x00ff,
1168 };
1169
1170 bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length) {
1171 while (length--) {
1172 if (StringImpl::latin1CaseFoldTable[*a++] !=
1173 StringImpl::latin1CaseFoldTable[*b++])
1174 return false;
1175 }
1176 return true;
1177 }
1178
1179 bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length) {
1180 while (length--) {
1181 if (foldCase(*a++) != StringImpl::latin1CaseFoldTable[*b++])
1182 return false;
1183 }
1184 return true;
1185 }
1186
1187 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction,
1188 unsigned start) {
1189 if (is8Bit())
1190 return WTF::find(characters8(), m_length, matchFunction, start);
1191 return WTF::find(characters16(), m_length, matchFunction, start);
1192 }
1193
1194 size_t StringImpl::find(const LChar* matchString, unsigned index) {
1195 // Check for null or empty string to match against
1196 if (!matchString)
1197 return kNotFound;
1198 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString));
1199 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());
1200 unsigned matchLength = matchStringLength;
1201 if (!matchLength)
1202 return min(index, length());
1203
1204 // Optimization 1: fast case for strings of length 1.
1205 if (matchLength == 1)
1206 return WTF::find(characters16(), length(), *matchString, index);
1207
1208 // Check index & matchLength are in range.
1209 if (index > length())
1210 return kNotFound;
1211 unsigned searchLength = length() - index;
1212 if (matchLength > searchLength)
1213 return kNotFound;
1214 // delta is the number of additional times to test; delta == 0 means test only once.
1215 unsigned delta = searchLength - matchLength;
1216
1217 const UChar* searchCharacters = characters16() + index;
1218
1219 // Optimization 2: keep a running hash of the strings,
1220 // only call equal if the hashes match.
1221 unsigned searchHash = 0;
1222 unsigned matchHash = 0;
1223 for (unsigned i = 0; i < matchLength; ++i) {
1224 searchHash += searchCharacters[i];
1225 matchHash += matchString[i];
1226 }
1227
1228 unsigned i = 0;
1229 // keep looping until we match
1230 while (searchHash != matchHash ||
1231 !equal(searchCharacters + i, matchString, matchLength)) {
1232 if (i == delta)
1233 return kNotFound;
1234 searchHash += searchCharacters[i + matchLength];
1235 searchHash -= searchCharacters[i];
1236 ++i;
1237 }
1238 return index + i;
1239 }
1240
1241 template <typename CharType>
1242 ALWAYS_INLINE size_t findIgnoringCaseInternal(const CharType* searchCharacters,
1243 const LChar* matchString,
1244 unsigned index,
1245 unsigned searchLength,
1246 unsigned matchLength) {
1247 // delta is the number of additional times to test; delta == 0 means test only once.
1248 unsigned delta = searchLength - matchLength;
1249
1250 unsigned i = 0;
1251 while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) {
1252 if (i == delta)
1253 return kNotFound;
1254 ++i;
1255 }
1256 return index + i;
1257 }
1258
1259 size_t StringImpl::findIgnoringCase(const LChar* matchString, unsigned index) {
1260 // Check for null or empty string to match against
1261 if (!matchString)
1262 return kNotFound;
1263 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString));
1264 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());
1265 unsigned matchLength = matchStringLength;
1266 if (!matchLength)
1267 return min(index, length());
1268
1269 // Check index & matchLength are in range.
1270 if (index > length())
1271 return kNotFound;
1272 unsigned searchLength = length() - index;
1273 if (matchLength > searchLength)
1274 return kNotFound;
1275
1276 if (is8Bit())
1277 return findIgnoringCaseInternal(characters8() + index, matchString, index,
1278 searchLength, matchLength);
1279 return findIgnoringCaseInternal(characters16() + index, matchString, index,
1280 searchLength, matchLength);
1281 }
1282
1283 template <typename SearchCharacterType, typename MatchCharacterType>
1284 ALWAYS_INLINE static size_t findInternal(
1285 const SearchCharacterType* searchCharacters,
1286 const MatchCharacterType* matchCharacters,
1287 unsigned index,
1288 unsigned searchLength,
1289 unsigned matchLength) {
1290 // Optimization: keep a running hash of the strings,
1291 // only call equal() if the hashes match.
1292
1293 // delta is the number of additional times to test; delta == 0 means test only once.
1294 unsigned delta = searchLength - matchLength;
1295
1296 unsigned searchHash = 0;
1297 unsigned matchHash = 0;
1298
1299 for (unsigned i = 0; i < matchLength; ++i) {
1300 searchHash += searchCharacters[i];
1301 matchHash += matchCharacters[i];
1302 }
1303
1304 unsigned i = 0;
1305 // keep looping until we match
1306 while (searchHash != matchHash ||
1307 !equal(searchCharacters + i, matchCharacters, matchLength)) {
1308 if (i == delta)
1309 return kNotFound;
1310 searchHash += searchCharacters[i + matchLength];
1311 searchHash -= searchCharacters[i];
1312 ++i;
1313 }
1314 return index + i;
1315 }
1316
1317 size_t StringImpl::find(StringImpl* matchString) {
1318 // Check for null string to match against
1319 if (UNLIKELY(!matchString))
1320 return kNotFound;
1321 unsigned matchLength = matchString->length();
1322
1323 // Optimization 1: fast case for strings of length 1.
1324 if (matchLength == 1) {
548 if (is8Bit()) { 1325 if (is8Bit()) {
549 unsigned firstIndexToBeLowered = m_length; 1326 if (matchString->is8Bit())
550 for (unsigned i = 0; i < m_length; ++i) { 1327 return WTF::find(characters8(), length(),
551 LChar ch = characters8()[i]; 1328 matchString->characters8()[0]);
552 if (UNLIKELY(isASCIIUpper(ch) || ch & ~0x7F)) { 1329 return WTF::find(characters8(), length(), matchString->characters16()[0]);
553 firstIndexToBeLowered = i; 1330 }
554 break; 1331 if (matchString->is8Bit())
555 } 1332 return WTF::find(characters16(), length(), matchString->characters8()[0]);
556 } 1333 return WTF::find(characters16(), length(), matchString->characters16()[0]);
557 1334 }
558 // Nothing to do if the string is all ASCII with no uppercase. 1335
559 if (firstIndexToBeLowered == m_length) 1336 // Check matchLength is in range.
560 return this; 1337 if (matchLength > length())
561 1338 return kNotFound;
562 LChar* data8; 1339
563 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8); 1340 // Check for empty string to match against
564 memcpy(data8, characters8(), firstIndexToBeLowered); 1341 if (UNLIKELY(!matchLength))
565 1342 return 0;
566 for (unsigned i = firstIndexToBeLowered; i < m_length; ++i) { 1343
567 LChar ch = characters8()[i]; 1344 if (is8Bit()) {
568 data8[i] = UNLIKELY(ch & ~0x7F) 1345 if (matchString->is8Bit())
569 ? static_cast<LChar>(Unicode::toLower(ch)) : toASCIILower(ch); 1346 return findInternal(characters8(), matchString->characters8(), 0,
570 } 1347 length(), matchLength);
571 1348 return findInternal(characters8(), matchString->characters16(), 0, length(),
572 return newImpl.release(); 1349 matchLength);
573 } 1350 }
574 1351
575 bool noUpper = true; 1352 if (matchString->is8Bit())
576 UChar ored = 0; 1353 return findInternal(characters16(), matchString->characters8(), 0, length(),
577 1354 matchLength);
578 const UChar* end = characters16() + m_length; 1355
579 for (const UChar* chp = characters16(); chp != end; ++chp) { 1356 return findInternal(characters16(), matchString->characters16(), 0, length(),
580 if (UNLIKELY(isASCIIUpper(*chp))) 1357 matchLength);
581 noUpper = false; 1358 }
582 ored |= *chp; 1359
583 } 1360 size_t StringImpl::find(StringImpl* matchString, unsigned index) {
584 // Nothing to do if the string is all ASCII with no uppercase. 1361 // Check for null or empty string to match against
585 if (noUpper && !(ored & ~0x7F)) 1362 if (UNLIKELY(!matchString))
586 return this; 1363 return kNotFound;
587 1364
588 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::ma x())); 1365 unsigned matchLength = matchString->length();
589 int32_t length = m_length; 1366
590 1367 // Optimization 1: fast case for strings of length 1.
591 if (!(ored & ~0x7F)) { 1368 if (matchLength == 1) {
592 UChar* data16; 1369 if (is8Bit())
593 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); 1370 return WTF::find(characters8(), length(), (*matchString)[0], index);
594 1371 return WTF::find(characters16(), length(), (*matchString)[0], index);
595 for (int32_t i = 0; i < length; ++i) { 1372 }
596 UChar c = characters16()[i]; 1373
597 data16[i] = toASCIILower(c); 1374 if (UNLIKELY(!matchLength))
598 } 1375 return min(index, length());
599 return newImpl.release(); 1376
600 } 1377 // Check index & matchLength are in range.
601 1378 if (index > length())
602 // Do a slower implementation for cases that include non-ASCII characters. 1379 return kNotFound;
603 UChar* data16; 1380 unsigned searchLength = length() - index;
604 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); 1381 if (matchLength > searchLength)
605 1382 return kNotFound;
606 bool error; 1383
607 int32_t realLength = Unicode::toLower(data16, length, characters16(), m_leng th, &error); 1384 if (is8Bit()) {
608 if (!error && realLength == length) 1385 if (matchString->is8Bit())
609 return newImpl.release(); 1386 return findInternal(characters8() + index, matchString->characters8(),
610 1387 index, searchLength, matchLength);
611 newImpl = createUninitialized(realLength, data16); 1388 return findInternal(characters8() + index, matchString->characters16(),
612 Unicode::toLower(data16, realLength, characters16(), m_length, &error); 1389 index, searchLength, matchLength);
613 if (error) 1390 }
614 return this; 1391
615 return newImpl.release(); 1392 if (matchString->is8Bit())
616 } 1393 return findInternal(characters16() + index, matchString->characters8(),
617 1394 index, searchLength, matchLength);
618 PassRefPtr<StringImpl> StringImpl::upper() 1395
619 { 1396 return findInternal(characters16() + index, matchString->characters16(),
620 // This function could be optimized for no-op cases the way lower() is, 1397 index, searchLength, matchLength);
621 // but in empirical testing, few actual calls to upper() are no-ops, so 1398 }
622 // it wouldn't be worth the extra time for pre-scanning. 1399
623 1400 template <typename SearchCharacterType, typename MatchCharacterType>
624 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::ma x())); 1401 ALWAYS_INLINE static size_t findIgnoringCaseInner(
625 int32_t length = m_length; 1402 const SearchCharacterType* searchCharacters,
626 1403 const MatchCharacterType* matchCharacters,
627 if (is8Bit()) { 1404 unsigned index,
628 LChar* data8; 1405 unsigned searchLength,
629 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8); 1406 unsigned matchLength) {
630 1407 // delta is the number of additional times to test; delta == 0 means test only once.
631 // Do a faster loop for the case where all the characters are ASCII. 1408 unsigned delta = searchLength - matchLength;
632 LChar ored = 0; 1409
633 for (int i = 0; i < length; ++i) { 1410 unsigned i = 0;
634 LChar c = characters8()[i]; 1411 // keep looping until we match
635 ored |= c; 1412 while (
636 data8[i] = toASCIIUpper(c); 1413 !equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) {
637 } 1414 if (i == delta)
638 if (!(ored & ~0x7F)) 1415 return kNotFound;
639 return newImpl.release(); 1416 ++i;
640 1417 }
641 // Do a slower implementation for cases that include non-ASCII Latin-1 c haracters. 1418 return index + i;
642 int numberSharpSCharacters = 0; 1419 }
643 1420
644 // There are two special cases. 1421 size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index) {
645 // 1. latin-1 characters when converted to upper case are 16 bit charac ters. 1422 // Check for null or empty string to match against
646 // 2. Lower case sharp-S converts to "SS" (two characters) 1423 if (!matchString)
647 for (int32_t i = 0; i < length; ++i) { 1424 return kNotFound;
648 LChar c = characters8()[i]; 1425 unsigned matchLength = matchString->length();
649 if (UNLIKELY(c == smallLetterSharpSCharacter)) 1426 if (!matchLength)
650 ++numberSharpSCharacters; 1427 return min(index, length());
651 UChar upper = static_cast<UChar>(Unicode::toUpper(c)); 1428
652 if (UNLIKELY(upper > 0xff)) { 1429 // Check index & matchLength are in range.
653 // Since this upper-cased character does not fit in an 8-bit str ing, we need to take the 16-bit path. 1430 if (index > length())
654 goto upconvert; 1431 return kNotFound;
655 } 1432 unsigned searchLength = length() - index;
656 data8[i] = static_cast<LChar>(upper); 1433 if (matchLength > searchLength)
657 } 1434 return kNotFound;
658 1435
659 if (!numberSharpSCharacters) 1436 if (is8Bit()) {
660 return newImpl.release(); 1437 if (matchString->is8Bit())
661 1438 return findIgnoringCaseInner(characters8() + index,
662 // We have numberSSCharacters sharp-s characters, but none of the other special characters. 1439 matchString->characters8(), index,
663 newImpl = createUninitialized(m_length + numberSharpSCharacters, data8); 1440 searchLength, matchLength);
664 1441 return findIgnoringCaseInner(characters8() + index,
665 LChar* dest = data8; 1442 matchString->characters16(), index,
666 1443 searchLength, matchLength);
667 for (int32_t i = 0; i < length; ++i) { 1444 }
668 LChar c = characters8()[i]; 1445
669 if (c == smallLetterSharpSCharacter) { 1446 if (matchString->is8Bit())
670 *dest++ = 'S'; 1447 return findIgnoringCaseInner(characters16() + index,
671 *dest++ = 'S'; 1448 matchString->characters8(), index,
672 } else { 1449 searchLength, matchLength);
673 *dest++ = static_cast<LChar>(Unicode::toUpper(c)); 1450
674 } 1451 return findIgnoringCaseInner(characters16() + index,
675 } 1452 matchString->characters16(), index, searchLength,
676 1453 matchLength);
677 return newImpl.release(); 1454 }
678 } 1455
679 1456 template <typename SearchCharacterType, typename MatchCharacterType>
680 upconvert: 1457 ALWAYS_INLINE static size_t findIgnoringASCIICaseInner(
681 RefPtr<StringImpl> upconverted = upconvertedString(); 1458 const SearchCharacterType* searchCharacters,
682 const UChar* source16 = upconverted->characters16(); 1459 const MatchCharacterType* matchCharacters,
683 1460 unsigned index,
684 UChar* data16; 1461 unsigned searchLength,
685 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); 1462 unsigned matchLength) {
686 1463 // delta is the number of additional times to test; delta == 0 means test only once.
687 // Do a faster loop for the case where all the characters are ASCII. 1464 unsigned delta = searchLength - matchLength;
688 UChar ored = 0; 1465
689 for (int i = 0; i < length; ++i) { 1466 unsigned i = 0;
690 UChar c = source16[i]; 1467 // keep looping until we match
691 ored |= c; 1468 while (!equalIgnoringASCIICase(searchCharacters + i, matchCharacters,
692 data16[i] = toASCIIUpper(c); 1469 matchLength)) {
693 } 1470 if (i == delta)
694 if (!(ored & ~0x7F)) 1471 return kNotFound;
695 return newImpl.release(); 1472 ++i;
696 1473 }
697 // Do a slower implementation for cases that include non-ASCII characters. 1474 return index + i;
698 bool error; 1475 }
699 int32_t realLength = Unicode::toUpper(data16, length, source16, m_length, &e rror); 1476
700 if (!error && realLength == length) 1477 size_t StringImpl::findIgnoringASCIICase(StringImpl* matchString,
701 return newImpl; 1478 unsigned index) {
702 newImpl = createUninitialized(realLength, data16); 1479 // Check for null or empty string to match against
703 Unicode::toUpper(data16, realLength, source16, m_length, &error); 1480 if (!matchString)
704 if (error) 1481 return kNotFound;
705 return this; 1482 unsigned matchLength = matchString->length();
706 return newImpl.release(); 1483 if (!matchLength)
707 } 1484 return min(index, length());
708 1485
709 static inline bool localeIdMatchesLang(const AtomicString& localeId, const char* lang) 1486 // Check index & matchLength are in range.
710 { 1487 if (index > length())
711 size_t langLength = strlen(lang); 1488 return kNotFound;
712 RELEASE_ASSERT(langLength >= 2 && langLength <= 3); 1489 unsigned searchLength = length() - index;
713 if (!localeId.impl() || !localeId.impl()->startsWithIgnoringCase(lang, langL ength)) 1490 if (matchLength > searchLength)
714 return false; 1491 return kNotFound;
715 if (localeId.impl()->length() == langLength) 1492
716 return true; 1493 if (is8Bit()) {
717 const UChar maybeDelimiter = (*localeId.impl())[langLength]; 1494 const LChar* searchStart = characters8() + index;
718 return maybeDelimiter == '-' || maybeDelimiter == '_' || maybeDelimiter == ' @'; 1495 if (matchString->is8Bit())
719 } 1496 return findIgnoringASCIICaseInner(searchStart, matchString->characters8(),
720 1497 index, searchLength, matchLength);
721 typedef int32_t (*icuCaseConverter)(UChar*, int32_t, const UChar*, int32_t, cons t char*, UErrorCode*); 1498 return findIgnoringASCIICaseInner(searchStart, matchString->characters16(),
722 1499 index, searchLength, matchLength);
723 static PassRefPtr<StringImpl> caseConvert(const UChar* source16, size_t length, icuCaseConverter converter, const char* locale, StringImpl* originalString) 1500 }
724 { 1501
725 UChar* data16; 1502 const UChar* searchStart = characters16() + index;
726 size_t targetLength = length; 1503 if (matchString->is8Bit())
727 RefPtr<StringImpl> output = StringImpl::createUninitialized(length, data16); 1504 return findIgnoringASCIICaseInner(searchStart, matchString->characters8(),
728 do { 1505 index, searchLength, matchLength);
729 UErrorCode status = U_ZERO_ERROR; 1506 return findIgnoringASCIICaseInner(searchStart, matchString->characters16(),
730 targetLength = converter(data16, targetLength, source16, length, locale, &status); 1507 index, searchLength, matchLength);
731 if (U_SUCCESS(status)) { 1508 }
732 if (length > 0) 1509
733 return output->substring(0, targetLength); 1510 size_t StringImpl::findNextLineStart(unsigned index) {
734 return output.release(); 1511 if (is8Bit())
735 } 1512 return WTF::findNextLineStart(characters8(), m_length, index);
736 if (status != U_BUFFER_OVERFLOW_ERROR) 1513 return WTF::findNextLineStart(characters16(), m_length, index);
737 return originalString; 1514 }
738 // Expand the buffer. 1515
739 output = StringImpl::createUninitialized(targetLength, data16); 1516 size_t StringImpl::count(LChar c) const {
740 } while (true); 1517 int count = 0;
741 } 1518 if (is8Bit()) {
742 1519 for (size_t i = 0; i < m_length; ++i)
743 PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier) 1520 count += characters8()[i] == c;
744 { 1521 } else {
745 // Use the more-optimized code path most of the time. 1522 for (size_t i = 0; i < m_length; ++i)
746 // Only Turkic (tr and az) languages and Lithuanian requires 1523 count += characters16()[i] == c;
747 // locale-specific lowercasing rules. Even though CLDR has el-Lower, 1524 }
748 // it's identical to the locale-agnostic lowercasing. Context-dependent 1525 return count;
749 // handling of Greek capital sigma is built into the common lowercasing 1526 }
750 // function in ICU. 1527
751 const char* localeForConversion = 0; 1528 size_t StringImpl::reverseFind(UChar c, unsigned index) {
752 if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(local eIdentifier, "az")) 1529 if (is8Bit())
753 localeForConversion = "tr"; 1530 return WTF::reverseFind(characters8(), m_length, c, index);
754 else if (localeIdMatchesLang(localeIdentifier, "lt")) 1531 return WTF::reverseFind(characters16(), m_length, c, index);
755 localeForConversion = "lt"; 1532 }
756 else 1533
757 return lower(); 1534 template <typename SearchCharacterType, typename MatchCharacterType>
758 1535 ALWAYS_INLINE static size_t reverseFindInner(
759 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max())) 1536 const SearchCharacterType* searchCharacters,
760 CRASH(); 1537 const MatchCharacterType* matchCharacters,
761 int length = m_length; 1538 unsigned index,
762 1539 unsigned length,
763 RefPtr<StringImpl> upconverted = upconvertedString(); 1540 unsigned matchLength) {
764 const UChar* source16 = upconverted->characters16(); 1541 // Optimization: keep a running hash of the strings,
765 return caseConvert(source16, length, u_strToLower, localeForConversion, this ); 1542 // only call equal if the hashes match.
766 } 1543
767 1544 // delta is the number of additional times to test; delta == 0 means test only once.
768 PassRefPtr<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier) 1545 unsigned delta = min(index, length - matchLength);
769 { 1546
770 // Use the more-optimized code path most of the time. 1547 unsigned searchHash = 0;
771 // Only Turkic (tr and az) languages and Greek require locale-specific 1548 unsigned matchHash = 0;
772 // lowercasing rules. 1549 for (unsigned i = 0; i < matchLength; ++i) {
773 icu::UnicodeString transliteratorId; 1550 searchHash += searchCharacters[delta + i];
774 const char* localeForConversion = 0; 1551 matchHash += matchCharacters[i];
775 if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(local eIdentifier, "az")) 1552 }
776 localeForConversion = "tr"; 1553
777 else if (localeIdMatchesLang(localeIdentifier, "el")) 1554 // keep looping until we match
778 transliteratorId = UNICODE_STRING_SIMPLE("el-Upper"); 1555 while (searchHash != matchHash ||
779 else if (localeIdMatchesLang(localeIdentifier, "lt")) 1556 !equal(searchCharacters + delta, matchCharacters, matchLength)) {
780 localeForConversion = "lt"; 1557 if (!delta)
781 else 1558 return kNotFound;
782 return upper(); 1559 --delta;
783 1560 searchHash -= searchCharacters[delta + matchLength];
784 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max())) 1561 searchHash += searchCharacters[delta];
785 CRASH(); 1562 }
786 int length = m_length; 1563 return delta;
787 1564 }
788 RefPtr<StringImpl> upconverted = upconvertedString(); 1565
789 const UChar* source16 = upconverted->characters16(); 1566 size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index) {
790 1567 // Check for null or empty string to match against
791 if (localeForConversion) 1568 if (!matchString)
792 return caseConvert(source16, length, u_strToUpper, localeForConversion, this); 1569 return kNotFound;
793 1570 unsigned matchLength = matchString->length();
794 // TODO(jungshik): Cache transliterator if perf penaly warrants it for Greek . 1571 unsigned ourLength = length();
795 UErrorCode status = U_ZERO_ERROR; 1572 if (!matchLength)
796 OwnPtr<icu::Transliterator> translit = 1573 return min(index, ourLength);
797 adoptPtr(icu::Transliterator::createInstance(transliteratorId, UTRANS_FO RWARD, status)); 1574
798 if (U_FAILURE(status)) 1575 // Optimization 1: fast case for strings of length 1.
799 return upper(); 1576 if (matchLength == 1) {
800 1577 if (is8Bit())
801 // target will be copy-on-write. 1578 return WTF::reverseFind(characters8(), ourLength, (*matchString)[0],
802 icu::UnicodeString target(false, source16, length); 1579 index);
803 translit->transliterate(target); 1580 return WTF::reverseFind(characters16(), ourLength, (*matchString)[0],
804 1581 index);
805 return create(target.getBuffer(), target.length()); 1582 }
806 } 1583
807 1584 // Check index & matchLength are in range.
808 PassRefPtr<StringImpl> StringImpl::fill(UChar character) 1585 if (matchLength > ourLength)
809 { 1586 return kNotFound;
810 if (!(character & ~0x7F)) { 1587
811 LChar* data; 1588 if (is8Bit()) {
812 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 1589 if (matchString->is8Bit())
813 for (unsigned i = 0; i < m_length; ++i) 1590 return reverseFindInner(characters8(), matchString->characters8(), index,
814 data[i] = static_cast<LChar>(character); 1591 ourLength, matchLength);
815 return newImpl.release(); 1592 return reverseFindInner(characters8(), matchString->characters16(), index,
816 } 1593 ourLength, matchLength);
1594 }
1595
1596 if (matchString->is8Bit())
1597 return reverseFindInner(characters16(), matchString->characters8(), index,
1598 ourLength, matchLength);
1599
1600 return reverseFindInner(characters16(), matchString->characters16(), index,
1601 ourLength, matchLength);
1602 }
1603
1604 ALWAYS_INLINE static bool equalSubstring(const StringImpl* stringImpl,
1605 unsigned startOffset,
1606 const LChar* matchString,
1607 unsigned matchLength) {
1608 ASSERT(stringImpl);
1609 ASSERT(matchLength <= stringImpl->length());
1610 ASSERT(startOffset + matchLength <= stringImpl->length());
1611
1612 if (stringImpl->is8Bit())
1613 return equal(stringImpl->characters8() + startOffset, matchString,
1614 matchLength);
1615 return equal(stringImpl->characters16() + startOffset, matchString,
1616 matchLength);
1617 }
1618
1619 bool StringImpl::startsWith(UChar character) const {
1620 return m_length && (*this)[0] == character;
1621 }
1622
1623 bool StringImpl::startsWith(const char* prefixString,
1624 unsigned prefixLength) const {
1625 ASSERT(prefixLength);
1626 if (prefixLength > length())
1627 return false;
1628 return equalSubstring(this, 0, reinterpret_cast<const LChar*>(prefixString),
1629 prefixLength);
1630 }
1631
1632 ALWAYS_INLINE static bool equalSubstring(const StringImpl* stringImpl,
1633 unsigned startOffset,
1634 const StringImpl* matchString) {
1635 ASSERT(stringImpl);
1636 ASSERT(matchString);
1637 ASSERT(matchString->length() <= stringImpl->length());
1638 ASSERT(startOffset + matchString->length() <= stringImpl->length());
1639
1640 unsigned matchLength = matchString->length();
1641 if (matchString->is8Bit())
1642 return equalSubstring(stringImpl, startOffset, matchString->characters8(),
1643 matchLength);
1644 if (stringImpl->is8Bit())
1645 return equal(stringImpl->characters8() + startOffset,
1646 matchString->characters16(), matchLength);
1647 return equal(stringImpl->characters16() + startOffset,
1648 matchString->characters16(), matchLength);
1649 }
1650
1651 bool StringImpl::startsWith(const StringImpl* prefix) const {
1652 ASSERT(prefix);
1653 if (prefix->length() > length())
1654 return false;
1655 return equalSubstring(this, 0, prefix);
1656 }
1657
1658 ALWAYS_INLINE static bool equalSubstringIgnoringCase(
1659 const StringImpl* stringImpl,
1660 unsigned startOffset,
1661 const LChar* matchString,
1662 unsigned matchLength) {
1663 ASSERT(stringImpl);
1664 ASSERT(matchLength <= stringImpl->length());
1665 ASSERT(startOffset + matchLength <= stringImpl->length());
1666
1667 if (stringImpl->is8Bit())
1668 return equalIgnoringCase(stringImpl->characters8() + startOffset,
1669 matchString, matchLength);
1670 return equalIgnoringCase(stringImpl->characters16() + startOffset,
1671 matchString, matchLength);
1672 }
1673
1674 bool StringImpl::startsWithIgnoringCase(const char* prefixString,
1675 unsigned prefixLength) const {
1676 ASSERT(prefixLength);
1677 if (prefixLength > length())
1678 return false;
1679 return equalSubstringIgnoringCase(
1680 this, 0, reinterpret_cast<const LChar*>(prefixString), prefixLength);
1681 }
1682
1683 ALWAYS_INLINE static bool equalSubstringIgnoringCase(
1684 const StringImpl* stringImpl,
1685 unsigned startOffset,
1686 const StringImpl* matchString) {
1687 ASSERT(stringImpl);
1688 ASSERT(matchString);
1689 ASSERT(matchString->length() <= stringImpl->length());
1690 ASSERT(startOffset + matchString->length() <= stringImpl->length());
1691
1692 unsigned matchLength = matchString->length();
1693 if (matchString->is8Bit())
1694 return equalSubstringIgnoringCase(stringImpl, startOffset,
1695 matchString->characters8(), matchLength);
1696 if (stringImpl->is8Bit())
1697 return equalIgnoringCase(stringImpl->characters8() + startOffset,
1698 matchString->characters16(), matchLength);
1699 return equalIgnoringCase(stringImpl->characters16() + startOffset,
1700 matchString->characters16(), matchLength);
1701 }
1702
1703 bool StringImpl::startsWithIgnoringCase(const StringImpl* prefix) const {
1704 ASSERT(prefix);
1705 if (prefix->length() > length())
1706 return false;
1707 return equalSubstringIgnoringCase(this, 0, prefix);
1708 }
1709
1710 ALWAYS_INLINE static bool equalSubstringIgnoringASCIICase(
1711 const StringImpl* stringImpl,
1712 unsigned startOffset,
1713 const LChar* matchString,
1714 unsigned matchLength) {
1715 ASSERT(stringImpl);
1716 ASSERT(matchLength <= stringImpl->length());
1717 ASSERT(startOffset + matchLength <= stringImpl->length());
1718
1719 if (stringImpl->is8Bit())
1720 return equalIgnoringASCIICase(stringImpl->characters8() + startOffset,
1721 matchString, matchLength);
1722 return equalIgnoringASCIICase(stringImpl->characters16() + startOffset,
1723 matchString, matchLength);
1724 }
1725
1726 bool StringImpl::startsWithIgnoringASCIICase(const char* prefixString,
1727 unsigned prefixLength) const {
1728 ASSERT(prefixLength);
1729 if (prefixLength > length())
1730 return false;
1731 return equalSubstringIgnoringASCIICase(
1732 this, 0, reinterpret_cast<const LChar*>(prefixString), prefixLength);
1733 }
1734
1735 ALWAYS_INLINE static bool equalSubstringIgnoringASCIICase(
1736 const StringImpl* stringImpl,
1737 unsigned startOffset,
1738 const StringImpl* matchString) {
1739 ASSERT(stringImpl);
1740 ASSERT(matchString);
1741 ASSERT(matchString->length() <= stringImpl->length());
1742 ASSERT(startOffset + matchString->length() <= stringImpl->length());
1743
1744 unsigned matchLength = matchString->length();
1745 if (matchString->is8Bit())
1746 return equalSubstringIgnoringASCIICase(
1747 stringImpl, startOffset, matchString->characters8(), matchLength);
1748 if (stringImpl->is8Bit())
1749 return equalIgnoringASCIICase(stringImpl->characters8() + startOffset,
1750 matchString->characters16(), matchLength);
1751 return equalIgnoringASCIICase(stringImpl->characters16() + startOffset,
1752 matchString->characters16(), matchLength);
1753 }
1754
1755 bool StringImpl::startsWithIgnoringASCIICase(const StringImpl* prefix) const {
1756 ASSERT(prefix);
1757 if (prefix->length() > length())
1758 return false;
1759 return equalSubstringIgnoringASCIICase(this, 0, prefix);
1760 }
1761
1762 bool StringImpl::endsWith(UChar character) const {
1763 return m_length && (*this)[m_length - 1] == character;
1764 }
1765
1766 bool StringImpl::endsWith(const char* suffixString,
1767 unsigned suffixLength) const {
1768 ASSERT(suffixLength);
1769 if (suffixLength > length())
1770 return false;
1771 return equalSubstring(this, length() - suffixLength,
1772 reinterpret_cast<const LChar*>(suffixString),
1773 suffixLength);
1774 }
1775
1776 bool StringImpl::endsWith(const StringImpl* suffix) const {
1777 ASSERT(suffix);
1778 unsigned suffixLength = suffix->length();
1779 if (suffixLength > length())
1780 return false;
1781 return equalSubstring(this, length() - suffixLength, suffix);
1782 }
1783
1784 bool StringImpl::endsWithIgnoringCase(const char* suffixString,
1785 unsigned suffixLength) const {
1786 ASSERT(suffixLength);
1787 if (suffixLength > length())
1788 return false;
1789 return equalSubstringIgnoringCase(
1790 this, length() - suffixLength,
1791 reinterpret_cast<const LChar*>(suffixString), suffixLength);
1792 }
1793
1794 bool StringImpl::endsWithIgnoringCase(const StringImpl* suffix) const {
1795 ASSERT(suffix);
1796 unsigned suffixLength = suffix->length();
1797 if (suffixLength > length())
1798 return false;
1799 return equalSubstringIgnoringCase(this, length() - suffixLength, suffix);
1800 }
1801
1802 bool StringImpl::endsWithIgnoringASCIICase(const char* suffixString,
1803 unsigned suffixLength) const {
1804 ASSERT(suffixLength);
1805 if (suffixLength > length())
1806 return false;
1807 return equalSubstringIgnoringASCIICase(
1808 this, length() - suffixLength,
1809 reinterpret_cast<const LChar*>(suffixString), suffixLength);
1810 }
1811
1812 bool StringImpl::endsWithIgnoringASCIICase(const StringImpl* suffix) const {
1813 ASSERT(suffix);
1814 unsigned suffixLength = suffix->length();
1815 if (suffixLength > length())
1816 return false;
1817 return equalSubstringIgnoringASCIICase(this, length() - suffixLength, suffix);
1818 }
1819
1820 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC) {
1821 if (oldC == newC)
1822 return this;
1823
1824 if (find(oldC) == kNotFound)
1825 return this;
1826
1827 unsigned i;
1828 if (is8Bit()) {
1829 if (newC <= 0xff) {
1830 LChar* data;
1831 LChar oldChar = static_cast<LChar>(oldC);
1832 LChar newChar = static_cast<LChar>(newC);
1833
1834 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1835
1836 for (i = 0; i != m_length; ++i) {
1837 LChar ch = characters8()[i];
1838 if (ch == oldChar)
1839 ch = newChar;
1840 data[i] = ch;
1841 }
1842 return newImpl.release();
1843 }
1844
1845 // There is the possibility we need to up convert from 8 to 16 bit,
1846 // create a 16 bit string for the result.
817 UChar* data; 1847 UChar* data;
818 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 1848 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
819 for (unsigned i = 0; i < m_length; ++i) 1849
820 data[i] = character; 1850 for (i = 0; i != m_length; ++i) {
1851 UChar ch = characters8()[i];
1852 if (ch == oldC)
1853 ch = newC;
1854 data[i] = ch;
1855 }
1856
821 return newImpl.release(); 1857 return newImpl.release();
822 } 1858 }
823 1859
824 PassRefPtr<StringImpl> StringImpl::foldCase() 1860 UChar* data;
825 { 1861 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
826 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::ma x())); 1862
827 int32_t length = m_length; 1863 for (i = 0; i != m_length; ++i) {
828 1864 UChar ch = characters16()[i];
829 if (is8Bit()) { 1865 if (ch == oldC)
830 // Do a faster loop for the case where all the characters are ASCII. 1866 ch = newC;
831 LChar* data; 1867 data[i] = ch;
832 RefPtr <StringImpl>newImpl = createUninitialized(m_length, data); 1868 }
833 LChar ored = 0; 1869 return newImpl.release();
834 1870 }
835 for (int32_t i = 0; i < length; ++i) { 1871
836 LChar c = characters8()[i]; 1872 PassRefPtr<StringImpl> StringImpl::replace(unsigned position,
837 data[i] = toASCIILower(c); 1873 unsigned lengthToReplace,
838 ored |= c; 1874 StringImpl* str) {
839 } 1875 position = min(position, length());
840 1876 lengthToReplace = min(lengthToReplace, length() - position);
841 if (!(ored & ~0x7F)) 1877 unsigned lengthToInsert = str ? str->length() : 0;
842 return newImpl.release(); 1878 if (!lengthToReplace && !lengthToInsert)
843 1879 return this;
844 // Do a slower implementation for cases that include non-ASCII Latin-1 c haracters. 1880
845 for (int32_t i = 0; i < length; ++i) 1881 RELEASE_ASSERT((length() - lengthToReplace) <
846 data[i] = static_cast<LChar>(Unicode::toLower(characters8()[i])); 1882 (numeric_limits<unsigned>::max() - lengthToInsert));
847 1883
848 return newImpl.release(); 1884 if (is8Bit() && (!str || str->is8Bit())) {
849 } 1885 LChar* data;
850
851 // Do a faster loop for the case where all the characters are ASCII.
852 UChar* data;
853 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
854 UChar ored = 0;
855 for (int32_t i = 0; i < length; ++i) {
856 UChar c = characters16()[i];
857 ored |= c;
858 data[i] = toASCIILower(c);
859 }
860 if (!(ored & ~0x7F))
861 return newImpl.release();
862
863 // Do a slower implementation for cases that include non-ASCII characters.
864 bool error;
865 int32_t realLength = Unicode::foldCase(data, length, characters16(), m_lengt h, &error);
866 if (!error && realLength == length)
867 return newImpl.release();
868 newImpl = createUninitialized(realLength, data);
869 Unicode::foldCase(data, realLength, characters16(), m_length, &error);
870 if (error)
871 return this;
872 return newImpl.release();
873 }
874
875 template <class UCharPredicate>
876 inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters(UCharPredicate predicate)
877 {
878 if (!m_length)
879 return empty();
880
881 unsigned start = 0;
882 unsigned end = m_length - 1;
883
884 // skip white space from start
885 while (start <= end && predicate(is8Bit() ? characters8()[start] : character s16()[start]))
886 ++start;
887
888 // only white space
889 if (start > end)
890 return empty();
891
892 // skip white space from end
893 while (end && predicate(is8Bit() ? characters8()[end] : characters16()[end]) )
894 --end;
895
896 if (!start && end == m_length - 1)
897 return this;
898 if (is8Bit())
899 return create(characters8() + start, end + 1 - start);
900 return create(characters16() + start, end + 1 - start);
901 }
902
903 class UCharPredicate final {
904 STACK_ALLOCATED();
905 public:
906 inline UCharPredicate(CharacterMatchFunctionPtr function): m_function(functi on) { }
907
908 inline bool operator()(UChar ch) const
909 {
910 return m_function(ch);
911 }
912
913 private:
914 const CharacterMatchFunctionPtr m_function;
915 };
916
917 class SpaceOrNewlinePredicate final {
918 STACK_ALLOCATED();
919 public:
920 inline bool operator()(UChar ch) const
921 {
922 return isSpaceOrNewline(ch);
923 }
924 };
925
926 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace()
927 {
928 return stripMatchedCharacters(SpaceOrNewlinePredicate());
929 }
930
931 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhi teSpace)
932 {
933 return stripMatchedCharacters(UCharPredicate(isWhiteSpace));
934 }
935
936 template <typename CharType>
937 ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters(const CharType * characters, CharacterMatchFunctionPtr findMatch)
938 {
939 const CharType* from = characters;
940 const CharType* fromend = from + m_length;
941
942 // Assume the common case will not remove any characters
943 while (from != fromend && !findMatch(*from))
944 ++from;
945 if (from == fromend)
946 return this;
947
948 StringBuffer<CharType> data(m_length);
949 CharType* to = data.characters();
950 unsigned outc = from - characters;
951
952 if (outc)
953 memcpy(to, characters, outc * sizeof(CharType));
954
955 while (true) {
956 while (from != fromend && findMatch(*from))
957 ++from;
958 while (from != fromend && !findMatch(*from))
959 to[outc++] = *from++;
960 if (from == fromend)
961 break;
962 }
963
964 data.shrink(outc);
965
966 return data.release();
967 }
968
969 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr fi ndMatch)
970 {
971 if (is8Bit())
972 return removeCharacters(characters8(), findMatch);
973 return removeCharacters(characters16(), findMatch);
974 }
975
976 template <typename CharType, class UCharPredicate>
977 inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UChar Predicate predicate, StripBehavior stripBehavior)
978 {
979 StringBuffer<CharType> data(m_length);
980
981 const CharType* from = getCharacters<CharType>();
982 const CharType* fromend = from + m_length;
983 int outc = 0;
984 bool changedToSpace = false;
985
986 CharType* to = data.characters();
987
988 if (stripBehavior == StripExtraWhiteSpace) {
989 while (true) {
990 while (from != fromend && predicate(*from)) {
991 if (*from != ' ')
992 changedToSpace = true;
993 ++from;
994 }
995 while (from != fromend && !predicate(*from))
996 to[outc++] = *from++;
997 if (from != fromend)
998 to[outc++] = ' ';
999 else
1000 break;
1001 }
1002
1003 if (outc > 0 && to[outc - 1] == ' ')
1004 --outc;
1005 } else {
1006 for (; from != fromend; ++from) {
1007 if (predicate(*from)) {
1008 if (*from != ' ')
1009 changedToSpace = true;
1010 to[outc++] = ' ';
1011 } else {
1012 to[outc++] = *from;
1013 }
1014 }
1015 }
1016
1017 if (static_cast<unsigned>(outc) == m_length && !changedToSpace)
1018 return this;
1019
1020 data.shrink(outc);
1021
1022 return data.release();
1023 }
1024
1025 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(StripBehavior stripBehavio r)
1026 {
1027 if (is8Bit())
1028 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(SpaceOrNewlin ePredicate(), stripBehavior);
1029 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(SpaceOrNewlinePre dicate(), stripBehavior);
1030 }
1031
1032 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr is WhiteSpace, StripBehavior stripBehavior)
1033 {
1034 if (is8Bit())
1035 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(UCharPredicat e(isWhiteSpace), stripBehavior);
1036 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(UCharPredicate(is WhiteSpace), stripBehavior);
1037 }
1038
1039 int StringImpl::toIntStrict(bool* ok, int base)
1040 {
1041 if (is8Bit())
1042 return charactersToIntStrict(characters8(), m_length, ok, base);
1043 return charactersToIntStrict(characters16(), m_length, ok, base);
1044 }
1045
1046 unsigned StringImpl::toUIntStrict(bool* ok, int base)
1047 {
1048 if (is8Bit())
1049 return charactersToUIntStrict(characters8(), m_length, ok, base);
1050 return charactersToUIntStrict(characters16(), m_length, ok, base);
1051 }
1052
1053 int64_t StringImpl::toInt64Strict(bool* ok, int base)
1054 {
1055 if (is8Bit())
1056 return charactersToInt64Strict(characters8(), m_length, ok, base);
1057 return charactersToInt64Strict(characters16(), m_length, ok, base);
1058 }
1059
1060 uint64_t StringImpl::toUInt64Strict(bool* ok, int base)
1061 {
1062 if (is8Bit())
1063 return charactersToUInt64Strict(characters8(), m_length, ok, base);
1064 return charactersToUInt64Strict(characters16(), m_length, ok, base);
1065 }
1066
1067 int StringImpl::toInt(bool* ok)
1068 {
1069 if (is8Bit())
1070 return charactersToInt(characters8(), m_length, ok);
1071 return charactersToInt(characters16(), m_length, ok);
1072 }
1073
1074 unsigned StringImpl::toUInt(bool* ok)
1075 {
1076 if (is8Bit())
1077 return charactersToUInt(characters8(), m_length, ok);
1078 return charactersToUInt(characters16(), m_length, ok);
1079 }
1080
1081 int64_t StringImpl::toInt64(bool* ok)
1082 {
1083 if (is8Bit())
1084 return charactersToInt64(characters8(), m_length, ok);
1085 return charactersToInt64(characters16(), m_length, ok);
1086 }
1087
1088 uint64_t StringImpl::toUInt64(bool* ok)
1089 {
1090 if (is8Bit())
1091 return charactersToUInt64(characters8(), m_length, ok);
1092 return charactersToUInt64(characters16(), m_length, ok);
1093 }
1094
1095 double StringImpl::toDouble(bool* ok)
1096 {
1097 if (is8Bit())
1098 return charactersToDouble(characters8(), m_length, ok);
1099 return charactersToDouble(characters16(), m_length, ok);
1100 }
1101
1102 float StringImpl::toFloat(bool* ok)
1103 {
1104 if (is8Bit())
1105 return charactersToFloat(characters8(), m_length, ok);
1106 return charactersToFloat(characters16(), m_length, ok);
1107 }
1108
1109 // Table is based on ftp://ftp.unicode.org/Public/UNIDATA/CaseFolding.txt
1110 const UChar StringImpl::latin1CaseFoldTable[256] = {
1111 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x00 09, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
1112 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x00 19, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
1113 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x00 29, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
1114 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x00 39, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
1115 0x0040, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x00 69, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
1116 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x00 79, 0x007a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
1117 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x00 69, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
1118 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x00 79, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
1119 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x00 89, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
1120 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x00 99, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
1121 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00 a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
1122 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x03bc, 0x00b6, 0x00b7, 0x00b8, 0x00 b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
1123 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00 e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
1124 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00d7, 0x00f8, 0x00 f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00df,
1125 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00 e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
1126 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00 f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
1127 };
1128
1129 bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length)
1130 {
1131 while (length--) {
1132 if (StringImpl::latin1CaseFoldTable[*a++] != StringImpl::latin1CaseFoldT able[*b++])
1133 return false;
1134 }
1135 return true;
1136 }
1137
1138 bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length)
1139 {
1140 while (length--) {
1141 if (foldCase(*a++) != StringImpl::latin1CaseFoldTable[*b++])
1142 return false;
1143 }
1144 return true;
1145 }
1146
1147 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start)
1148 {
1149 if (is8Bit())
1150 return WTF::find(characters8(), m_length, matchFunction, start);
1151 return WTF::find(characters16(), m_length, matchFunction, start);
1152 }
1153
1154 size_t StringImpl::find(const LChar* matchString, unsigned index)
1155 {
1156 // Check for null or empty string to match against
1157 if (!matchString)
1158 return kNotFound;
1159 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString) );
1160 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());
1161 unsigned matchLength = matchStringLength;
1162 if (!matchLength)
1163 return min(index, length());
1164
1165 // Optimization 1: fast case for strings of length 1.
1166 if (matchLength == 1)
1167 return WTF::find(characters16(), length(), *matchString, index);
1168
1169 // Check index & matchLength are in range.
1170 if (index > length())
1171 return kNotFound;
1172 unsigned searchLength = length() - index;
1173 if (matchLength > searchLength)
1174 return kNotFound;
1175 // delta is the number of additional times to test; delta == 0 means test on ly once.
1176 unsigned delta = searchLength - matchLength;
1177
1178 const UChar* searchCharacters = characters16() + index;
1179
1180 // Optimization 2: keep a running hash of the strings,
1181 // only call equal if the hashes match.
1182 unsigned searchHash = 0;
1183 unsigned matchHash = 0;
1184 for (unsigned i = 0; i < matchLength; ++i) {
1185 searchHash += searchCharacters[i];
1186 matchHash += matchString[i];
1187 }
1188
1189 unsigned i = 0;
1190 // keep looping until we match
1191 while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) {
1192 if (i == delta)
1193 return kNotFound;
1194 searchHash += searchCharacters[i + matchLength];
1195 searchHash -= searchCharacters[i];
1196 ++i;
1197 }
1198 return index + i;
1199 }
1200
1201 template<typename CharType>
1202 ALWAYS_INLINE size_t findIgnoringCaseInternal(const CharType* searchCharacters, const LChar* matchString, unsigned index, unsigned searchLength, unsigned matchL ength)
1203 {
1204 // delta is the number of additional times to test; delta == 0 means test on ly once.
1205 unsigned delta = searchLength - matchLength;
1206
1207 unsigned i = 0;
1208 while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) {
1209 if (i == delta)
1210 return kNotFound;
1211 ++i;
1212 }
1213 return index + i;
1214 }
1215
1216 size_t StringImpl::findIgnoringCase(const LChar* matchString, unsigned index)
1217 {
1218 // Check for null or empty string to match against
1219 if (!matchString)
1220 return kNotFound;
1221 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString) );
1222 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());
1223 unsigned matchLength = matchStringLength;
1224 if (!matchLength)
1225 return min(index, length());
1226
1227 // Check index & matchLength are in range.
1228 if (index > length())
1229 return kNotFound;
1230 unsigned searchLength = length() - index;
1231 if (matchLength > searchLength)
1232 return kNotFound;
1233
1234 if (is8Bit())
1235 return findIgnoringCaseInternal(characters8() + index, matchString, inde x, searchLength, matchLength);
1236 return findIgnoringCaseInternal(characters16() + index, matchString, index, searchLength, matchLength);
1237 }
1238
1239 template <typename SearchCharacterType, typename MatchCharacterType>
1240 ALWAYS_INLINE static size_t findInternal(const SearchCharacterType* searchCharac ters, const MatchCharacterType* matchCharacters, unsigned index, unsigned search Length, unsigned matchLength)
1241 {
1242 // Optimization: keep a running hash of the strings,
1243 // only call equal() if the hashes match.
1244
1245 // delta is the number of additional times to test; delta == 0 means test on ly once.
1246 unsigned delta = searchLength - matchLength;
1247
1248 unsigned searchHash = 0;
1249 unsigned matchHash = 0;
1250
1251 for (unsigned i = 0; i < matchLength; ++i) {
1252 searchHash += searchCharacters[i];
1253 matchHash += matchCharacters[i];
1254 }
1255
1256 unsigned i = 0;
1257 // keep looping until we match
1258 while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacte rs, matchLength)) {
1259 if (i == delta)
1260 return kNotFound;
1261 searchHash += searchCharacters[i + matchLength];
1262 searchHash -= searchCharacters[i];
1263 ++i;
1264 }
1265 return index + i;
1266 }
1267
1268 size_t StringImpl::find(StringImpl* matchString)
1269 {
1270 // Check for null string to match against
1271 if (UNLIKELY(!matchString))
1272 return kNotFound;
1273 unsigned matchLength = matchString->length();
1274
1275 // Optimization 1: fast case for strings of length 1.
1276 if (matchLength == 1) {
1277 if (is8Bit()) {
1278 if (matchString->is8Bit())
1279 return WTF::find(characters8(), length(), matchString->character s8()[0]);
1280 return WTF::find(characters8(), length(), matchString->characters16( )[0]);
1281 }
1282 if (matchString->is8Bit())
1283 return WTF::find(characters16(), length(), matchString->characters8( )[0]);
1284 return WTF::find(characters16(), length(), matchString->characters16()[0 ]);
1285 }
1286
1287 // Check matchLength is in range.
1288 if (matchLength > length())
1289 return kNotFound;
1290
1291 // Check for empty string to match against
1292 if (UNLIKELY(!matchLength))
1293 return 0;
1294
1295 if (is8Bit()) {
1296 if (matchString->is8Bit())
1297 return findInternal(characters8(), matchString->characters8(), 0, le ngth(), matchLength);
1298 return findInternal(characters8(), matchString->characters16(), 0, lengt h(), matchLength);
1299 }
1300
1301 if (matchString->is8Bit())
1302 return findInternal(characters16(), matchString->characters8(), 0, lengt h(), matchLength);
1303
1304 return findInternal(characters16(), matchString->characters16(), 0, length() , matchLength);
1305 }
1306
1307 size_t StringImpl::find(StringImpl* matchString, unsigned index)
1308 {
1309 // Check for null or empty string to match against
1310 if (UNLIKELY(!matchString))
1311 return kNotFound;
1312
1313 unsigned matchLength = matchString->length();
1314
1315 // Optimization 1: fast case for strings of length 1.
1316 if (matchLength == 1) {
1317 if (is8Bit())
1318 return WTF::find(characters8(), length(), (*matchString)[0], index);
1319 return WTF::find(characters16(), length(), (*matchString)[0], index);
1320 }
1321
1322 if (UNLIKELY(!matchLength))
1323 return min(index, length());
1324
1325 // Check index & matchLength are in range.
1326 if (index > length())
1327 return kNotFound;
1328 unsigned searchLength = length() - index;
1329 if (matchLength > searchLength)
1330 return kNotFound;
1331
1332 if (is8Bit()) {
1333 if (matchString->is8Bit())
1334 return findInternal(characters8() + index, matchString->characters8( ), index, searchLength, matchLength);
1335 return findInternal(characters8() + index, matchString->characters16(), index, searchLength, matchLength);
1336 }
1337
1338 if (matchString->is8Bit())
1339 return findInternal(characters16() + index, matchString->characters8(), index, searchLength, matchLength);
1340
1341 return findInternal(characters16() + index, matchString->characters16(), ind ex, searchLength, matchLength);
1342 }
1343
1344 template <typename SearchCharacterType, typename MatchCharacterType>
1345 ALWAYS_INLINE static size_t findIgnoringCaseInner(const SearchCharacterType* sea rchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsign ed searchLength, unsigned matchLength)
1346 {
1347 // delta is the number of additional times to test; delta == 0 means test on ly once.
1348 unsigned delta = searchLength - matchLength;
1349
1350 unsigned i = 0;
1351 // keep looping until we match
1352 while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength )) {
1353 if (i == delta)
1354 return kNotFound;
1355 ++i;
1356 }
1357 return index + i;
1358 }
1359
1360 size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index)
1361 {
1362 // Check for null or empty string to match against
1363 if (!matchString)
1364 return kNotFound;
1365 unsigned matchLength = matchString->length();
1366 if (!matchLength)
1367 return min(index, length());
1368
1369 // Check index & matchLength are in range.
1370 if (index > length())
1371 return kNotFound;
1372 unsigned searchLength = length() - index;
1373 if (matchLength > searchLength)
1374 return kNotFound;
1375
1376 if (is8Bit()) {
1377 if (matchString->is8Bit())
1378 return findIgnoringCaseInner(characters8() + index, matchString->cha racters8(), index, searchLength, matchLength);
1379 return findIgnoringCaseInner(characters8() + index, matchString->charact ers16(), index, searchLength, matchLength);
1380 }
1381
1382 if (matchString->is8Bit())
1383 return findIgnoringCaseInner(characters16() + index, matchString->charac ters8(), index, searchLength, matchLength);
1384
1385 return findIgnoringCaseInner(characters16() + index, matchString->characters 16(), index, searchLength, matchLength);
1386 }
1387
1388 template <typename SearchCharacterType, typename MatchCharacterType>
1389 ALWAYS_INLINE static size_t findIgnoringASCIICaseInner(const SearchCharacterType * searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, u nsigned searchLength, unsigned matchLength)
1390 {
1391 // delta is the number of additional times to test; delta == 0 means test on ly once.
1392 unsigned delta = searchLength - matchLength;
1393
1394 unsigned i = 0;
1395 // keep looping until we match
1396 while (!equalIgnoringASCIICase(searchCharacters + i, matchCharacters, matchL ength)) {
1397 if (i == delta)
1398 return kNotFound;
1399 ++i;
1400 }
1401 return index + i;
1402 }
1403
1404 size_t StringImpl::findIgnoringASCIICase(StringImpl* matchString, unsigned index )
1405 {
1406 // Check for null or empty string to match against
1407 if (!matchString)
1408 return kNotFound;
1409 unsigned matchLength = matchString->length();
1410 if (!matchLength)
1411 return min(index, length());
1412
1413 // Check index & matchLength are in range.
1414 if (index > length())
1415 return kNotFound;
1416 unsigned searchLength = length() - index;
1417 if (matchLength > searchLength)
1418 return kNotFound;
1419
1420 if (is8Bit()) {
1421 const LChar* searchStart = characters8() + index;
1422 if (matchString->is8Bit())
1423 return findIgnoringASCIICaseInner(searchStart, matchString->characte rs8(), index, searchLength, matchLength);
1424 return findIgnoringASCIICaseInner(searchStart, matchString->characters16 (), index, searchLength, matchLength);
1425 }
1426
1427 const UChar* searchStart = characters16() + index;
1428 if (matchString->is8Bit())
1429 return findIgnoringASCIICaseInner(searchStart, matchString->characters8( ), index, searchLength, matchLength);
1430 return findIgnoringASCIICaseInner(searchStart, matchString->characters16(), index, searchLength, matchLength);
1431 }
1432
1433 size_t StringImpl::findNextLineStart(unsigned index)
1434 {
1435 if (is8Bit())
1436 return WTF::findNextLineStart(characters8(), m_length, index);
1437 return WTF::findNextLineStart(characters16(), m_length, index);
1438 }
1439
1440 size_t StringImpl::count(LChar c) const
1441 {
1442 int count = 0;
1443 if (is8Bit()) {
1444 for (size_t i = 0; i < m_length; ++i)
1445 count += characters8()[i] == c;
1446 } else {
1447 for (size_t i = 0; i < m_length; ++i)
1448 count += characters16()[i] == c;
1449 }
1450 return count;
1451 }
1452
1453 size_t StringImpl::reverseFind(UChar c, unsigned index)
1454 {
1455 if (is8Bit())
1456 return WTF::reverseFind(characters8(), m_length, c, index);
1457 return WTF::reverseFind(characters16(), m_length, c, index);
1458 }
1459
1460 template <typename SearchCharacterType, typename MatchCharacterType>
1461 ALWAYS_INLINE static size_t reverseFindInner(const SearchCharacterType* searchCh aracters, const MatchCharacterType* matchCharacters, unsigned index, unsigned le ngth, unsigned matchLength)
1462 {
1463 // Optimization: keep a running hash of the strings,
1464 // only call equal if the hashes match.
1465
1466 // delta is the number of additional times to test; delta == 0 means test on ly once.
1467 unsigned delta = min(index, length - matchLength);
1468
1469 unsigned searchHash = 0;
1470 unsigned matchHash = 0;
1471 for (unsigned i = 0; i < matchLength; ++i) {
1472 searchHash += searchCharacters[delta + i];
1473 matchHash += matchCharacters[i];
1474 }
1475
1476 // keep looping until we match
1477 while (searchHash != matchHash || !equal(searchCharacters + delta, matchChar acters, matchLength)) {
1478 if (!delta)
1479 return kNotFound;
1480 --delta;
1481 searchHash -= searchCharacters[delta + matchLength];
1482 searchHash += searchCharacters[delta];
1483 }
1484 return delta;
1485 }
1486
1487 size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index)
1488 {
1489 // Check for null or empty string to match against
1490 if (!matchString)
1491 return kNotFound;
1492 unsigned matchLength = matchString->length();
1493 unsigned ourLength = length();
1494 if (!matchLength)
1495 return min(index, ourLength);
1496
1497 // Optimization 1: fast case for strings of length 1.
1498 if (matchLength == 1) {
1499 if (is8Bit())
1500 return WTF::reverseFind(characters8(), ourLength, (*matchString)[0], index);
1501 return WTF::reverseFind(characters16(), ourLength, (*matchString)[0], in dex);
1502 }
1503
1504 // Check index & matchLength are in range.
1505 if (matchLength > ourLength)
1506 return kNotFound;
1507
1508 if (is8Bit()) {
1509 if (matchString->is8Bit())
1510 return reverseFindInner(characters8(), matchString->characters8(), i ndex, ourLength, matchLength);
1511 return reverseFindInner(characters8(), matchString->characters16(), inde x, ourLength, matchLength);
1512 }
1513
1514 if (matchString->is8Bit())
1515 return reverseFindInner(characters16(), matchString->characters8(), inde x, ourLength, matchLength);
1516
1517 return reverseFindInner(characters16(), matchString->characters16(), index, ourLength, matchLength);
1518 }
1519
1520 ALWAYS_INLINE static bool equalSubstring(const StringImpl* stringImpl, unsigned startOffset, const LChar* matchString, unsigned matchLength)
1521 {
1522 ASSERT(stringImpl);
1523 ASSERT(matchLength <= stringImpl->length());
1524 ASSERT(startOffset + matchLength <= stringImpl->length());
1525
1526 if (stringImpl->is8Bit())
1527 return equal(stringImpl->characters8() + startOffset, matchString, match Length);
1528 return equal(stringImpl->characters16() + startOffset, matchString, matchLen gth);
1529 }
1530
1531 bool StringImpl::startsWith(UChar character) const
1532 {
1533 return m_length && (*this)[0] == character;
1534 }
1535
1536 bool StringImpl::startsWith(const char* prefixString, unsigned prefixLength) con st
1537 {
1538 ASSERT(prefixLength);
1539 if (prefixLength > length())
1540 return false;
1541 return equalSubstring(this, 0, reinterpret_cast<const LChar*>(prefixString), prefixLength);
1542 }
1543
1544 ALWAYS_INLINE static bool equalSubstring(const StringImpl* stringImpl, unsigned startOffset, const StringImpl* matchString)
1545 {
1546 ASSERT(stringImpl);
1547 ASSERT(matchString);
1548 ASSERT(matchString->length() <= stringImpl->length());
1549 ASSERT(startOffset + matchString->length() <= stringImpl->length());
1550
1551 unsigned matchLength = matchString->length();
1552 if (matchString->is8Bit())
1553 return equalSubstring(stringImpl, startOffset, matchString->characters8( ), matchLength);
1554 if (stringImpl->is8Bit())
1555 return equal(stringImpl->characters8() + startOffset, matchString->chara cters16(), matchLength);
1556 return equal(stringImpl->characters16() + startOffset, matchString->characte rs16(), matchLength);
1557 }
1558
1559 bool StringImpl::startsWith(const StringImpl* prefix) const
1560 {
1561 ASSERT(prefix);
1562 if (prefix->length() > length())
1563 return false;
1564 return equalSubstring(this, 0, prefix);
1565 }
1566
1567 ALWAYS_INLINE static bool equalSubstringIgnoringCase(const StringImpl* stringImp l, unsigned startOffset, const LChar* matchString, unsigned matchLength)
1568 {
1569 ASSERT(stringImpl);
1570 ASSERT(matchLength <= stringImpl->length());
1571 ASSERT(startOffset + matchLength <= stringImpl->length());
1572
1573 if (stringImpl->is8Bit())
1574 return equalIgnoringCase(stringImpl->characters8() + startOffset, matchS tring, matchLength);
1575 return equalIgnoringCase(stringImpl->characters16() + startOffset, matchStri ng, matchLength);
1576 }
1577
1578 bool StringImpl::startsWithIgnoringCase(const char* prefixString, unsigned prefi xLength) const
1579 {
1580 ASSERT(prefixLength);
1581 if (prefixLength > length())
1582 return false;
1583 return equalSubstringIgnoringCase(this, 0, reinterpret_cast<const LChar*>(pr efixString), prefixLength);
1584 }
1585
1586 ALWAYS_INLINE static bool equalSubstringIgnoringCase(const StringImpl* stringImp l, unsigned startOffset, const StringImpl* matchString)
1587 {
1588 ASSERT(stringImpl);
1589 ASSERT(matchString);
1590 ASSERT(matchString->length() <= stringImpl->length());
1591 ASSERT(startOffset + matchString->length() <= stringImpl->length());
1592
1593 unsigned matchLength = matchString->length();
1594 if (matchString->is8Bit())
1595 return equalSubstringIgnoringCase(stringImpl, startOffset, matchString-> characters8(), matchLength);
1596 if (stringImpl->is8Bit())
1597 return equalIgnoringCase(stringImpl->characters8() + startOffset, matchS tring->characters16(), matchLength);
1598 return equalIgnoringCase(stringImpl->characters16() + startOffset, matchStri ng->characters16(), matchLength);
1599 }
1600
1601 bool StringImpl::startsWithIgnoringCase(const StringImpl* prefix) const
1602 {
1603 ASSERT(prefix);
1604 if (prefix->length() > length())
1605 return false;
1606 return equalSubstringIgnoringCase(this, 0, prefix);
1607 }
1608
1609 ALWAYS_INLINE static bool equalSubstringIgnoringASCIICase(const StringImpl* stri ngImpl, unsigned startOffset, const LChar* matchString, unsigned matchLength)
1610 {
1611 ASSERT(stringImpl);
1612 ASSERT(matchLength <= stringImpl->length());
1613 ASSERT(startOffset + matchLength <= stringImpl->length());
1614
1615 if (stringImpl->is8Bit())
1616 return equalIgnoringASCIICase(stringImpl->characters8() + startOffset, m atchString, matchLength);
1617 return equalIgnoringASCIICase(stringImpl->characters16() + startOffset, matc hString, matchLength);
1618 }
1619
1620 bool StringImpl::startsWithIgnoringASCIICase(const char* prefixString, unsigned prefixLength) const
1621 {
1622 ASSERT(prefixLength);
1623 if (prefixLength > length())
1624 return false;
1625 return equalSubstringIgnoringASCIICase(this, 0, reinterpret_cast<const LChar *>(prefixString), prefixLength);
1626 }
1627
1628 ALWAYS_INLINE static bool equalSubstringIgnoringASCIICase(const StringImpl* stri ngImpl, unsigned startOffset, const StringImpl* matchString)
1629 {
1630 ASSERT(stringImpl);
1631 ASSERT(matchString);
1632 ASSERT(matchString->length() <= stringImpl->length());
1633 ASSERT(startOffset + matchString->length() <= stringImpl->length());
1634
1635 unsigned matchLength = matchString->length();
1636 if (matchString->is8Bit())
1637 return equalSubstringIgnoringASCIICase(stringImpl, startOffset, matchStr ing->characters8(), matchLength);
1638 if (stringImpl->is8Bit())
1639 return equalIgnoringASCIICase(stringImpl->characters8() + startOffset, m atchString->characters16(), matchLength);
1640 return equalIgnoringASCIICase(stringImpl->characters16() + startOffset, matc hString->characters16(), matchLength);
1641 }
1642
1643 bool StringImpl::startsWithIgnoringASCIICase(const StringImpl* prefix) const
1644 {
1645 ASSERT(prefix);
1646 if (prefix->length() > length())
1647 return false;
1648 return equalSubstringIgnoringASCIICase(this, 0, prefix);
1649 }
1650
1651 bool StringImpl::endsWith(UChar character) const
1652 {
1653 return m_length && (*this)[m_length - 1] == character;
1654 }
1655
1656 bool StringImpl::endsWith(const char* suffixString, unsigned suffixLength) const
1657 {
1658 ASSERT(suffixLength);
1659 if (suffixLength > length())
1660 return false;
1661 return equalSubstring(this, length() - suffixLength, reinterpret_cast<const LChar*>(suffixString), suffixLength);
1662 }
1663
1664 bool StringImpl::endsWith(const StringImpl* suffix) const
1665 {
1666 ASSERT(suffix);
1667 unsigned suffixLength = suffix->length();
1668 if (suffixLength > length())
1669 return false;
1670 return equalSubstring(this, length() - suffixLength, suffix);
1671 }
1672
1673 bool StringImpl::endsWithIgnoringCase(const char* suffixString, unsigned suffixL ength) const
1674 {
1675 ASSERT(suffixLength);
1676 if (suffixLength > length())
1677 return false;
1678 return equalSubstringIgnoringCase(this, length() - suffixLength, reinterpret _cast<const LChar*>(suffixString), suffixLength);
1679 }
1680
1681 bool StringImpl::endsWithIgnoringCase(const StringImpl* suffix) const
1682 {
1683 ASSERT(suffix);
1684 unsigned suffixLength = suffix->length();
1685 if (suffixLength > length())
1686 return false;
1687 return equalSubstringIgnoringCase(this, length() - suffixLength, suffix);
1688 }
1689
1690 bool StringImpl::endsWithIgnoringASCIICase(const char* suffixString, unsigned su ffixLength) const
1691 {
1692 ASSERT(suffixLength);
1693 if (suffixLength > length())
1694 return false;
1695 return equalSubstringIgnoringASCIICase(this, length() - suffixLength, reinte rpret_cast<const LChar*>(suffixString), suffixLength);
1696 }
1697
1698 bool StringImpl::endsWithIgnoringASCIICase(const StringImpl* suffix) const
1699 {
1700 ASSERT(suffix);
1701 unsigned suffixLength = suffix->length();
1702 if (suffixLength > length())
1703 return false;
1704 return equalSubstringIgnoringASCIICase(this, length() - suffixLength, suffix );
1705 }
1706
1707 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC)
1708 {
1709 if (oldC == newC)
1710 return this;
1711
1712 if (find(oldC) == kNotFound)
1713 return this;
1714
1715 unsigned i;
1716 if (is8Bit()) {
1717 if (newC <= 0xff) {
1718 LChar* data;
1719 LChar oldChar = static_cast<LChar>(oldC);
1720 LChar newChar = static_cast<LChar>(newC);
1721
1722 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1723
1724 for (i = 0; i != m_length; ++i) {
1725 LChar ch = characters8()[i];
1726 if (ch == oldChar)
1727 ch = newChar;
1728 data[i] = ch;
1729 }
1730 return newImpl.release();
1731 }
1732
1733 // There is the possibility we need to up convert from 8 to 16 bit,
1734 // create a 16 bit string for the result.
1735 UChar* data;
1736 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1737
1738 for (i = 0; i != m_length; ++i) {
1739 UChar ch = characters8()[i];
1740 if (ch == oldC)
1741 ch = newC;
1742 data[i] = ch;
1743 }
1744
1745 return newImpl.release();
1746 }
1747
1748 UChar* data;
1749 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1750
1751 for (i = 0; i != m_length; ++i) {
1752 UChar ch = characters16()[i];
1753 if (ch == oldC)
1754 ch = newC;
1755 data[i] = ch;
1756 }
1757 return newImpl.release();
1758 }
1759
1760 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToR eplace, StringImpl* str)
1761 {
1762 position = min(position, length());
1763 lengthToReplace = min(lengthToReplace, length() - position);
1764 unsigned lengthToInsert = str ? str->length() : 0;
1765 if (!lengthToReplace && !lengthToInsert)
1766 return this;
1767
1768 RELEASE_ASSERT((length() - lengthToReplace) < (numeric_limits<unsigned>::max () - lengthToInsert));
1769
1770 if (is8Bit() && (!str || str->is8Bit())) {
1771 LChar* data;
1772 RefPtr<StringImpl> newImpl =
1773 createUninitialized(length() - lengthToReplace + lengthToInsert, data);
1774 memcpy(data, characters8(), position * sizeof(LChar));
1775 if (str)
1776 memcpy(data + position, str->characters8(), lengthToInsert * sizeof( LChar));
1777 memcpy(data + position + lengthToInsert, characters8() + position + leng thToReplace,
1778 (length() - position - lengthToReplace) * sizeof(LChar));
1779 return newImpl.release();
1780 }
1781 UChar* data;
1782 RefPtr<StringImpl> newImpl = 1886 RefPtr<StringImpl> newImpl =
1783 createUninitialized(length() - lengthToReplace + lengthToInsert, data); 1887 createUninitialized(length() - lengthToReplace + lengthToInsert, data);
1784 if (is8Bit()) 1888 memcpy(data, characters8(), position * sizeof(LChar));
1785 for (unsigned i = 0; i < position; ++i) 1889 if (str)
1786 data[i] = characters8()[i]; 1890 memcpy(data + position, str->characters8(),
1891 lengthToInsert * sizeof(LChar));
1892 memcpy(data + position + lengthToInsert,
1893 characters8() + position + lengthToReplace,
1894 (length() - position - lengthToReplace) * sizeof(LChar));
1895 return newImpl.release();
1896 }
1897 UChar* data;
1898 RefPtr<StringImpl> newImpl =
1899 createUninitialized(length() - lengthToReplace + lengthToInsert, data);
1900 if (is8Bit())
1901 for (unsigned i = 0; i < position; ++i)
1902 data[i] = characters8()[i];
1903 else
1904 memcpy(data, characters16(), position * sizeof(UChar));
1905 if (str) {
1906 if (str->is8Bit())
1907 for (unsigned i = 0; i < lengthToInsert; ++i)
1908 data[i + position] = str->characters8()[i];
1787 else 1909 else
1788 memcpy(data, characters16(), position * sizeof(UChar)); 1910 memcpy(data + position, str->characters16(),
1789 if (str) { 1911 lengthToInsert * sizeof(UChar));
1790 if (str->is8Bit()) 1912 }
1791 for (unsigned i = 0; i < lengthToInsert; ++i) 1913 if (is8Bit()) {
1792 data[i + position] = str->characters8()[i]; 1914 for (unsigned i = 0; i < length() - position - lengthToReplace; ++i)
1793 else 1915 data[i + position + lengthToInsert] =
1794 memcpy(data + position, str->characters16(), lengthToInsert * sizeof (UChar)); 1916 characters8()[i + position + lengthToReplace];
1795 } 1917 } else {
1796 if (is8Bit()) { 1918 memcpy(data + position + lengthToInsert,
1797 for (unsigned i = 0; i < length() - position - lengthToReplace; ++i) 1919 characters16() + position + lengthToReplace,
1798 data[i + position + lengthToInsert] = characters8()[i + position + l engthToReplace]; 1920 (length() - position - lengthToReplace) * sizeof(UChar));
1799 } else { 1921 }
1800 memcpy(data + position + lengthToInsert, characters16() + position + len gthToReplace, 1922 return newImpl.release();
1801 (length() - position - lengthToReplace) * sizeof(UChar)); 1923 }
1802 } 1924
1925 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern,
1926 StringImpl* replacement) {
1927 if (!replacement)
1928 return this;
1929
1930 if (replacement->is8Bit())
1931 return replace(pattern, replacement->characters8(), replacement->length());
1932
1933 return replace(pattern, replacement->characters16(), replacement->length());
1934 }
1935
1936 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern,
1937 const LChar* replacement,
1938 unsigned repStrLength) {
1939 ASSERT(replacement);
1940
1941 size_t srcSegmentStart = 0;
1942 unsigned matchCount = 0;
1943
1944 // Count the matches.
1945 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1946 ++matchCount;
1947 ++srcSegmentStart;
1948 }
1949
1950 // If we have 0 matches then we don't have to do any more work.
1951 if (!matchCount)
1952 return this;
1953
1954 RELEASE_ASSERT(!repStrLength ||
1955 matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1956
1957 unsigned replaceSize = matchCount * repStrLength;
1958 unsigned newSize = m_length - matchCount;
1959 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
1960
1961 newSize += replaceSize;
1962
1963 // Construct the new data.
1964 size_t srcSegmentEnd;
1965 unsigned srcSegmentLength;
1966 srcSegmentStart = 0;
1967 unsigned dstOffset = 0;
1968
1969 if (is8Bit()) {
1970 LChar* data;
1971 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1972
1973 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1974 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1975 memcpy(data + dstOffset, characters8() + srcSegmentStart,
1976 srcSegmentLength * sizeof(LChar));
1977 dstOffset += srcSegmentLength;
1978 memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar));
1979 dstOffset += repStrLength;
1980 srcSegmentStart = srcSegmentEnd + 1;
1981 }
1982
1983 srcSegmentLength = m_length - srcSegmentStart;
1984 memcpy(data + dstOffset, characters8() + srcSegmentStart,
1985 srcSegmentLength * sizeof(LChar));
1986
1987 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1988
1803 return newImpl.release(); 1989 return newImpl.release();
1804 } 1990 }
1805 1991
1806 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacemen t) 1992 UChar* data;
1807 { 1993 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1808 if (!replacement) 1994
1809 return this; 1995 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1810 1996 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1811 if (replacement->is8Bit()) 1997 memcpy(data + dstOffset, characters16() + srcSegmentStart,
1812 return replace(pattern, replacement->characters8(), replacement->length( )); 1998 srcSegmentLength * sizeof(UChar));
1813 1999
1814 return replace(pattern, replacement->characters16(), replacement->length()); 2000 dstOffset += srcSegmentLength;
1815 } 2001 for (unsigned i = 0; i < repStrLength; ++i)
1816 2002 data[i + dstOffset] = replacement[i];
1817 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const LChar* replaceme nt, unsigned repStrLength) 2003
1818 { 2004 dstOffset += repStrLength;
1819 ASSERT(replacement); 2005 srcSegmentStart = srcSegmentEnd + 1;
1820 2006 }
1821 size_t srcSegmentStart = 0; 2007
1822 unsigned matchCount = 0; 2008 srcSegmentLength = m_length - srcSegmentStart;
1823 2009 memcpy(data + dstOffset, characters16() + srcSegmentStart,
1824 // Count the matches. 2010 srcSegmentLength * sizeof(UChar));
1825 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) { 2011
1826 ++matchCount; 2012 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1827 ++srcSegmentStart; 2013
1828 } 2014 return newImpl.release();
1829 2015 }
1830 // If we have 0 matches then we don't have to do any more work. 2016
1831 if (!matchCount) 2017 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern,
1832 return this; 2018 const UChar* replacement,
1833 2019 unsigned repStrLength) {
1834 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max( ) / repStrLength); 2020 ASSERT(replacement);
1835 2021
1836 unsigned replaceSize = matchCount * repStrLength; 2022 size_t srcSegmentStart = 0;
1837 unsigned newSize = m_length - matchCount; 2023 unsigned matchCount = 0;
1838 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize)); 2024
1839 2025 // Count the matches.
1840 newSize += replaceSize; 2026 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1841 2027 ++matchCount;
1842 // Construct the new data. 2028 ++srcSegmentStart;
1843 size_t srcSegmentEnd; 2029 }
1844 unsigned srcSegmentLength; 2030
1845 srcSegmentStart = 0; 2031 // If we have 0 matches then we don't have to do any more work.
1846 unsigned dstOffset = 0; 2032 if (!matchCount)
1847 2033 return this;
1848 if (is8Bit()) { 2034
1849 LChar* data; 2035 RELEASE_ASSERT(!repStrLength ||
1850 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); 2036 matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1851 2037
1852 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { 2038 unsigned replaceSize = matchCount * repStrLength;
1853 srcSegmentLength = srcSegmentEnd - srcSegmentStart; 2039 unsigned newSize = m_length - matchCount;
1854 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegment Length * sizeof(LChar)); 2040 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
1855 dstOffset += srcSegmentLength; 2041
1856 memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar)); 2042 newSize += replaceSize;
1857 dstOffset += repStrLength; 2043
1858 srcSegmentStart = srcSegmentEnd + 1; 2044 // Construct the new data.
1859 } 2045 size_t srcSegmentEnd;
1860 2046 unsigned srcSegmentLength;
1861 srcSegmentLength = m_length - srcSegmentStart; 2047 srcSegmentStart = 0;
1862 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLeng th * sizeof(LChar)); 2048 unsigned dstOffset = 0;
1863 2049
1864 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); 2050 if (is8Bit()) {
1865
1866 return newImpl.release();
1867 }
1868
1869 UChar* data; 2051 UChar* data;
1870 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); 2052 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1871 2053
1872 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { 2054 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1873 srcSegmentLength = srcSegmentEnd - srcSegmentStart; 2055 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1874 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLen gth * sizeof(UChar)); 2056 for (unsigned i = 0; i < srcSegmentLength; ++i)
1875 2057 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1876 dstOffset += srcSegmentLength; 2058
1877 for (unsigned i = 0; i < repStrLength; ++i) 2059 dstOffset += srcSegmentLength;
1878 data[i + dstOffset] = replacement[i]; 2060 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
1879 2061
1880 dstOffset += repStrLength; 2062 dstOffset += repStrLength;
1881 srcSegmentStart = srcSegmentEnd + 1; 2063 srcSegmentStart = srcSegmentEnd + 1;
1882 } 2064 }
1883 2065
1884 srcSegmentLength = m_length - srcSegmentStart; 2066 srcSegmentLength = m_length - srcSegmentStart;
1885 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar)); 2067 for (unsigned i = 0; i < srcSegmentLength; ++i)
2068 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1886 2069
1887 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); 2070 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1888 2071
1889 return newImpl.release(); 2072 return newImpl.release();
1890 } 2073 }
1891 2074
1892 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const UChar* replaceme nt, unsigned repStrLength) 2075 UChar* data;
1893 { 2076 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1894 ASSERT(replacement); 2077
1895 2078 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1896 size_t srcSegmentStart = 0; 2079 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1897 unsigned matchCount = 0; 2080 memcpy(data + dstOffset, characters16() + srcSegmentStart,
1898 2081 srcSegmentLength * sizeof(UChar));
1899 // Count the matches. 2082
1900 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) { 2083 dstOffset += srcSegmentLength;
1901 ++matchCount; 2084 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
1902 ++srcSegmentStart; 2085
1903 } 2086 dstOffset += repStrLength;
1904 2087 srcSegmentStart = srcSegmentEnd + 1;
1905 // If we have 0 matches then we don't have to do any more work. 2088 }
1906 if (!matchCount) 2089
1907 return this; 2090 srcSegmentLength = m_length - srcSegmentStart;
1908 2091 memcpy(data + dstOffset, characters16() + srcSegmentStart,
1909 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max( ) / repStrLength); 2092 srcSegmentLength * sizeof(UChar));
1910 2093
1911 unsigned replaceSize = matchCount * repStrLength; 2094 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1912 unsigned newSize = m_length - matchCount; 2095
1913 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize)); 2096 return newImpl.release();
1914 2097 }
1915 newSize += replaceSize; 2098
1916 2099 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern,
1917 // Construct the new data. 2100 StringImpl* replacement) {
1918 size_t srcSegmentEnd; 2101 if (!pattern || !replacement)
1919 unsigned srcSegmentLength; 2102 return this;
1920 srcSegmentStart = 0; 2103
1921 unsigned dstOffset = 0; 2104 unsigned patternLength = pattern->length();
1922 2105 if (!patternLength)
1923 if (is8Bit()) { 2106 return this;
1924 UChar* data; 2107
1925 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); 2108 unsigned repStrLength = replacement->length();
1926 2109 size_t srcSegmentStart = 0;
1927 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { 2110 unsigned matchCount = 0;
1928 srcSegmentLength = srcSegmentEnd - srcSegmentStart; 2111
1929 for (unsigned i = 0; i < srcSegmentLength; ++i) 2112 // Count the matches.
1930 data[i + dstOffset] = characters8()[i + srcSegmentStart]; 2113 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1931 2114 ++matchCount;
1932 dstOffset += srcSegmentLength; 2115 srcSegmentStart += patternLength;
1933 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar)); 2116 }
1934 2117
1935 dstOffset += repStrLength; 2118 // If we have 0 matches, we don't have to do any more work
1936 srcSegmentStart = srcSegmentEnd + 1; 2119 if (!matchCount)
1937 } 2120 return this;
1938 2121
1939 srcSegmentLength = m_length - srcSegmentStart; 2122 unsigned newSize = m_length - matchCount * patternLength;
1940 for (unsigned i = 0; i < srcSegmentLength; ++i) 2123 RELEASE_ASSERT(!repStrLength ||
1941 data[i + dstOffset] = characters8()[i + srcSegmentStart]; 2124 matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1942 2125
1943 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); 2126 RELEASE_ASSERT(newSize <=
1944 2127 (numeric_limits<unsigned>::max() - matchCount * repStrLength));
1945 return newImpl.release(); 2128
1946 } 2129 newSize += matchCount * repStrLength;
1947 2130
1948 UChar* data; 2131 // Construct the new data
1949 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); 2132 size_t srcSegmentEnd;
1950 2133 unsigned srcSegmentLength;
1951 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { 2134 srcSegmentStart = 0;
1952 srcSegmentLength = srcSegmentEnd - srcSegmentStart; 2135 unsigned dstOffset = 0;
1953 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLen gth * sizeof(UChar)); 2136 bool srcIs8Bit = is8Bit();
1954 2137 bool replacementIs8Bit = replacement->is8Bit();
1955 dstOffset += srcSegmentLength; 2138
1956 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar)); 2139 // There are 4 cases:
1957 2140 // 1. This and replacement are both 8 bit.
1958 dstOffset += repStrLength; 2141 // 2. This and replacement are both 16 bit.
1959 srcSegmentStart = srcSegmentEnd + 1; 2142 // 3. This is 8 bit and replacement is 16 bit.
1960 } 2143 // 4. This is 16 bit and replacement is 8 bit.
1961 2144 if (srcIs8Bit && replacementIs8Bit) {
1962 srcSegmentLength = m_length - srcSegmentStart; 2145 // Case 1
1963 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar)); 2146 LChar* data;
1964
1965 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1966
1967 return newImpl.release();
1968 }
1969
1970 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* repl acement)
1971 {
1972 if (!pattern || !replacement)
1973 return this;
1974
1975 unsigned patternLength = pattern->length();
1976 if (!patternLength)
1977 return this;
1978
1979 unsigned repStrLength = replacement->length();
1980 size_t srcSegmentStart = 0;
1981 unsigned matchCount = 0;
1982
1983 // Count the matches.
1984 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1985 ++matchCount;
1986 srcSegmentStart += patternLength;
1987 }
1988
1989 // If we have 0 matches, we don't have to do any more work
1990 if (!matchCount)
1991 return this;
1992
1993 unsigned newSize = m_length - matchCount * patternLength;
1994 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max( ) / repStrLength);
1995
1996 RELEASE_ASSERT(newSize <= (numeric_limits<unsigned>::max() - matchCount * re pStrLength));
1997
1998 newSize += matchCount * repStrLength;
1999
2000
2001 // Construct the new data
2002 size_t srcSegmentEnd;
2003 unsigned srcSegmentLength;
2004 srcSegmentStart = 0;
2005 unsigned dstOffset = 0;
2006 bool srcIs8Bit = is8Bit();
2007 bool replacementIs8Bit = replacement->is8Bit();
2008
2009 // There are 4 cases:
2010 // 1. This and replacement are both 8 bit.
2011 // 2. This and replacement are both 16 bit.
2012 // 3. This is 8 bit and replacement is 16 bit.
2013 // 4. This is 16 bit and replacement is 8 bit.
2014 if (srcIs8Bit && replacementIs8Bit) {
2015 // Case 1
2016 LChar* data;
2017 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
2018 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
2019 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
2020 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegment Length * sizeof(LChar));
2021 dstOffset += srcSegmentLength;
2022 memcpy(data + dstOffset, replacement->characters8(), repStrLength * sizeof(LChar));
2023 dstOffset += repStrLength;
2024 srcSegmentStart = srcSegmentEnd + patternLength;
2025 }
2026
2027 srcSegmentLength = m_length - srcSegmentStart;
2028 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLeng th * sizeof(LChar));
2029
2030 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
2031
2032 return newImpl.release();
2033 }
2034
2035 UChar* data;
2036 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); 2147 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
2037 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { 2148 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
2038 srcSegmentLength = srcSegmentEnd - srcSegmentStart; 2149 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
2039 if (srcIs8Bit) { 2150 memcpy(data + dstOffset, characters8() + srcSegmentStart,
2040 // Case 3. 2151 srcSegmentLength * sizeof(LChar));
2041 for (unsigned i = 0; i < srcSegmentLength; ++i) 2152 dstOffset += srcSegmentLength;
2042 data[i + dstOffset] = characters8()[i + srcSegmentStart]; 2153 memcpy(data + dstOffset, replacement->characters8(),
2043 } else { 2154 repStrLength * sizeof(LChar));
2044 // Case 2 & 4. 2155 dstOffset += repStrLength;
2045 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmen tLength * sizeof(UChar)); 2156 srcSegmentStart = srcSegmentEnd + patternLength;
2046 }
2047 dstOffset += srcSegmentLength;
2048 if (replacementIs8Bit) {
2049 // Cases 2 & 3.
2050 for (unsigned i = 0; i < repStrLength; ++i)
2051 data[i + dstOffset] = replacement->characters8()[i];
2052 } else {
2053 // Case 4
2054 memcpy(data + dstOffset, replacement->characters16(), repStrLength * sizeof(UChar));
2055 }
2056 dstOffset += repStrLength;
2057 srcSegmentStart = srcSegmentEnd + patternLength;
2058 } 2157 }
2059 2158
2060 srcSegmentLength = m_length - srcSegmentStart; 2159 srcSegmentLength = m_length - srcSegmentStart;
2160 memcpy(data + dstOffset, characters8() + srcSegmentStart,
2161 srcSegmentLength * sizeof(LChar));
2162
2163 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
2164
2165 return newImpl.release();
2166 }
2167
2168 UChar* data;
2169 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
2170 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
2171 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
2061 if (srcIs8Bit) { 2172 if (srcIs8Bit) {
2062 // Case 3. 2173 // Case 3.
2063 for (unsigned i = 0; i < srcSegmentLength; ++i) 2174 for (unsigned i = 0; i < srcSegmentLength; ++i)
2064 data[i + dstOffset] = characters8()[i + srcSegmentStart]; 2175 data[i + dstOffset] = characters8()[i + srcSegmentStart];
2065 } else { 2176 } else {
2066 // Cases 2 & 4. 2177 // Case 2 & 4.
2067 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLen gth * sizeof(UChar)); 2178 memcpy(data + dstOffset, characters16() + srcSegmentStart,
2068 } 2179 srcSegmentLength * sizeof(UChar));
2069 2180 }
2070 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); 2181 dstOffset += srcSegmentLength;
2071 2182 if (replacementIs8Bit) {
2072 return newImpl.release(); 2183 // Cases 2 & 3.
2073 } 2184 for (unsigned i = 0; i < repStrLength; ++i)
2074 2185 data[i + dstOffset] = replacement->characters8()[i];
2075 PassRefPtr<StringImpl> StringImpl::upconvertedString() 2186 } else {
2076 { 2187 // Case 4
2077 if (is8Bit()) 2188 memcpy(data + dstOffset, replacement->characters16(),
2078 return String::make16BitFrom8BitSource(characters8(), m_length).releaseI mpl(); 2189 repStrLength * sizeof(UChar));
2079 return this; 2190 }
2080 } 2191 dstOffset += repStrLength;
2081 2192 srcSegmentStart = srcSegmentEnd + patternLength;
2082 static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl* b) 2193 }
2083 { 2194
2084 unsigned aLength = a->length(); 2195 srcSegmentLength = m_length - srcSegmentStart;
2085 unsigned bLength = b->length(); 2196 if (srcIs8Bit) {
2086 if (aLength != bLength) 2197 // Case 3.
2198 for (unsigned i = 0; i < srcSegmentLength; ++i)
2199 data[i + dstOffset] = characters8()[i + srcSegmentStart];
2200 } else {
2201 // Cases 2 & 4.
2202 memcpy(data + dstOffset, characters16() + srcSegmentStart,
2203 srcSegmentLength * sizeof(UChar));
2204 }
2205
2206 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
2207
2208 return newImpl.release();
2209 }
2210
2211 PassRefPtr<StringImpl> StringImpl::upconvertedString() {
2212 if (is8Bit())
2213 return String::make16BitFrom8BitSource(characters8(), m_length)
2214 .releaseImpl();
2215 return this;
2216 }
2217
2218 static inline bool stringImplContentEqual(const StringImpl* a,
2219 const StringImpl* b) {
2220 unsigned aLength = a->length();
2221 unsigned bLength = b->length();
2222 if (aLength != bLength)
2223 return false;
2224
2225 if (a->is8Bit()) {
2226 if (b->is8Bit())
2227 return equal(a->characters8(), b->characters8(), aLength);
2228
2229 return equal(a->characters8(), b->characters16(), aLength);
2230 }
2231
2232 if (b->is8Bit())
2233 return equal(a->characters16(), b->characters8(), aLength);
2234
2235 return equal(a->characters16(), b->characters16(), aLength);
2236 }
2237
2238 bool equal(const StringImpl* a, const StringImpl* b) {
2239 if (a == b)
2240 return true;
2241 if (!a || !b)
2242 return false;
2243 if (a->isAtomic() && b->isAtomic())
2244 return false;
2245
2246 return stringImplContentEqual(a, b);
2247 }
2248
2249 template <typename CharType>
2250 inline bool equalInternal(const StringImpl* a,
2251 const CharType* b,
2252 unsigned length) {
2253 if (!a)
2254 return !b;
2255 if (!b)
2256 return false;
2257
2258 if (a->length() != length)
2259 return false;
2260 if (a->is8Bit())
2261 return equal(a->characters8(), b, length);
2262 return equal(a->characters16(), b, length);
2263 }
2264
2265 bool equal(const StringImpl* a, const LChar* b, unsigned length) {
2266 return equalInternal(a, b, length);
2267 }
2268
2269 bool equal(const StringImpl* a, const UChar* b, unsigned length) {
2270 return equalInternal(a, b, length);
2271 }
2272
2273 bool equal(const StringImpl* a, const LChar* b) {
2274 if (!a)
2275 return !b;
2276 if (!b)
2277 return !a;
2278
2279 unsigned length = a->length();
2280
2281 if (a->is8Bit()) {
2282 const LChar* aPtr = a->characters8();
2283 for (unsigned i = 0; i != length; ++i) {
2284 LChar bc = b[i];
2285 LChar ac = aPtr[i];
2286 if (!bc)
2087 return false; 2287 return false;
2088 2288 if (ac != bc)
2089 if (a->is8Bit()) {
2090 if (b->is8Bit())
2091 return equal(a->characters8(), b->characters8(), aLength);
2092
2093 return equal(a->characters8(), b->characters16(), aLength);
2094 }
2095
2096 if (b->is8Bit())
2097 return equal(a->characters16(), b->characters8(), aLength);
2098
2099 return equal(a->characters16(), b->characters16(), aLength);
2100 }
2101
2102 bool equal(const StringImpl* a, const StringImpl* b)
2103 {
2104 if (a == b)
2105 return true;
2106 if (!a || !b)
2107 return false; 2289 return false;
2108 if (a->isAtomic() && b->isAtomic()) 2290 }
2291
2292 return !b[length];
2293 }
2294
2295 const UChar* aPtr = a->characters16();
2296 for (unsigned i = 0; i != length; ++i) {
2297 LChar bc = b[i];
2298 if (!bc)
2299 return false;
2300 if (aPtr[i] != bc)
2301 return false;
2302 }
2303
2304 return !b[length];
2305 }
2306
2307 bool equalNonNull(const StringImpl* a, const StringImpl* b) {
2308 ASSERT(a && b);
2309 if (a == b)
2310 return true;
2311
2312 return stringImplContentEqual(a, b);
2313 }
2314
2315 bool equalIgnoringCase(const StringImpl* a, const StringImpl* b) {
2316 if (a == b)
2317 return true;
2318 if (!a || !b)
2319 return false;
2320
2321 return CaseFoldingHash::equal(a, b);
2322 }
2323
2324 bool equalIgnoringCase(const StringImpl* a, const LChar* b) {
2325 if (!a)
2326 return !b;
2327 if (!b)
2328 return !a;
2329
2330 unsigned length = a->length();
2331
2332 // Do a faster loop for the case where all the characters are ASCII.
2333 UChar ored = 0;
2334 bool equal = true;
2335 if (a->is8Bit()) {
2336 const LChar* as = a->characters8();
2337 for (unsigned i = 0; i != length; ++i) {
2338 LChar bc = b[i];
2339 if (!bc)
2109 return false; 2340 return false;
2110 2341 UChar ac = as[i];
2111 return stringImplContentEqual(a, b); 2342 ored |= ac;
2112 } 2343 equal = equal && (toASCIILower(ac) == toASCIILower(bc));
2113
2114 template <typename CharType>
2115 inline bool equalInternal(const StringImpl* a, const CharType* b, unsigned lengt h)
2116 {
2117 if (!a)
2118 return !b;
2119 if (!b)
2120 return false;
2121
2122 if (a->length() != length)
2123 return false;
2124 if (a->is8Bit())
2125 return equal(a->characters8(), b, length);
2126 return equal(a->characters16(), b, length);
2127 }
2128
2129 bool equal(const StringImpl* a, const LChar* b, unsigned length)
2130 {
2131 return equalInternal(a, b, length);
2132 }
2133
2134 bool equal(const StringImpl* a, const UChar* b, unsigned length)
2135 {
2136 return equalInternal(a, b, length);
2137 }
2138
2139 bool equal(const StringImpl* a, const LChar* b)
2140 {
2141 if (!a)
2142 return !b;
2143 if (!b)
2144 return !a;
2145
2146 unsigned length = a->length();
2147
2148 if (a->is8Bit()) {
2149 const LChar* aPtr = a->characters8();
2150 for (unsigned i = 0; i != length; ++i) {
2151 LChar bc = b[i];
2152 LChar ac = aPtr[i];
2153 if (!bc)
2154 return false;
2155 if (ac != bc)
2156 return false;
2157 }
2158
2159 return !b[length];
2160 }
2161
2162 const UChar* aPtr = a->characters16();
2163 for (unsigned i = 0; i != length; ++i) {
2164 LChar bc = b[i];
2165 if (!bc)
2166 return false;
2167 if (aPtr[i] != bc)
2168 return false;
2169 }
2170
2171 return !b[length];
2172 }
2173
2174 bool equalNonNull(const StringImpl* a, const StringImpl* b)
2175 {
2176 ASSERT(a && b);
2177 if (a == b)
2178 return true;
2179
2180 return stringImplContentEqual(a, b);
2181 }
2182
2183 bool equalIgnoringCase(const StringImpl* a, const StringImpl* b)
2184 {
2185 if (a == b)
2186 return true;
2187 if (!a || !b)
2188 return false;
2189
2190 return CaseFoldingHash::equal(a, b);
2191 }
2192
2193 bool equalIgnoringCase(const StringImpl* a, const LChar* b)
2194 {
2195 if (!a)
2196 return !b;
2197 if (!b)
2198 return !a;
2199
2200 unsigned length = a->length();
2201
2202 // Do a faster loop for the case where all the characters are ASCII.
2203 UChar ored = 0;
2204 bool equal = true;
2205 if (a->is8Bit()) {
2206 const LChar* as = a->characters8();
2207 for (unsigned i = 0; i != length; ++i) {
2208 LChar bc = b[i];
2209 if (!bc)
2210 return false;
2211 UChar ac = as[i];
2212 ored |= ac;
2213 equal = equal && (toASCIILower(ac) == toASCIILower(bc));
2214 }
2215
2216 // Do a slower implementation for cases that include non-ASCII character s.
2217 if (ored & ~0x7F) {
2218 equal = true;
2219 for (unsigned i = 0; i != length; ++i)
2220 equal = equal && (foldCase(as[i]) == foldCase(b[i]));
2221 }
2222
2223 return equal && !b[length];
2224 }
2225
2226 const UChar* as = a->characters16();
2227 for (unsigned i = 0; i != length; ++i) {
2228 LChar bc = b[i];
2229 if (!bc)
2230 return false;
2231 UChar ac = as[i];
2232 ored |= ac;
2233 equal = equal && (toASCIILower(ac) == toASCIILower(bc));
2234 } 2344 }
2235 2345
2236 // Do a slower implementation for cases that include non-ASCII characters. 2346 // Do a slower implementation for cases that include non-ASCII characters.
2237 if (ored & ~0x7F) { 2347 if (ored & ~0x7F) {
2238 equal = true; 2348 equal = true;
2239 for (unsigned i = 0; i != length; ++i) { 2349 for (unsigned i = 0; i != length; ++i)
2240 equal = equal && (foldCase(as[i]) == foldCase(b[i])); 2350 equal = equal && (foldCase(as[i]) == foldCase(b[i]));
2241 }
2242 } 2351 }
2243 2352
2244 return equal && !b[length]; 2353 return equal && !b[length];
2245 } 2354 }
2246 2355
2247 bool equalIgnoringCaseNonNull(const StringImpl* a, const StringImpl* b) 2356 const UChar* as = a->characters16();
2248 { 2357 for (unsigned i = 0; i != length; ++i) {
2249 ASSERT(a && b); 2358 LChar bc = b[i];
2250 if (a == b) 2359 if (!bc)
2251 return true; 2360 return false;
2252 2361 UChar ac = as[i];
2253 unsigned length = a->length(); 2362 ored |= ac;
2254 if (length != b->length()) 2363 equal = equal && (toASCIILower(ac) == toASCIILower(bc));
2255 return false; 2364 }
2256 2365
2257 if (a->is8Bit()) { 2366 // Do a slower implementation for cases that include non-ASCII characters.
2258 if (b->is8Bit()) 2367 if (ored & ~0x7F) {
2259 return equalIgnoringCase(a->characters8(), b->characters8(), length) ; 2368 equal = true;
2260 2369 for (unsigned i = 0; i != length; ++i) {
2261 return equalIgnoringCase(b->characters16(), a->characters8(), length); 2370 equal = equal && (foldCase(as[i]) == foldCase(b[i]));
2262 } 2371 }
2263 2372 }
2373
2374 return equal && !b[length];
2375 }
2376
2377 bool equalIgnoringCaseNonNull(const StringImpl* a, const StringImpl* b) {
2378 ASSERT(a && b);
2379 if (a == b)
2380 return true;
2381
2382 unsigned length = a->length();
2383 if (length != b->length())
2384 return false;
2385
2386 if (a->is8Bit()) {
2264 if (b->is8Bit()) 2387 if (b->is8Bit())
2265 return equalIgnoringCase(a->characters16(), b->characters8(), length); 2388 return equalIgnoringCase(a->characters8(), b->characters8(), length);
2266 2389
2267 return equalIgnoringCase(a->characters16(), b->characters16(), length); 2390 return equalIgnoringCase(b->characters16(), a->characters8(), length);
2268 } 2391 }
2269 2392
2270 bool equalIgnoringNullity(StringImpl* a, StringImpl* b) 2393 if (b->is8Bit())
2271 { 2394 return equalIgnoringCase(a->characters16(), b->characters8(), length);
2272 if (!a && b && !b->length()) 2395
2273 return true; 2396 return equalIgnoringCase(a->characters16(), b->characters16(), length);
2274 if (!b && a && !a->length()) 2397 }
2275 return true; 2398
2276 return equal(a, b); 2399 bool equalIgnoringNullity(StringImpl* a, StringImpl* b) {
2277 } 2400 if (!a && b && !b->length())
2278 2401 return true;
2279 size_t StringImpl::sizeInBytes() const 2402 if (!b && a && !a->length())
2280 { 2403 return true;
2281 size_t size = length(); 2404 return equal(a, b);
2282 if (!is8Bit()) 2405 }
2283 size *= 2; 2406
2284 return size + sizeof(*this); 2407 size_t StringImpl::sizeInBytes() const {
2285 } 2408 size_t size = length();
2286 2409 if (!is8Bit())
2287 UChar32 toUpper(UChar32 c, const AtomicString& localeIdentifier) 2410 size *= 2;
2288 { 2411 return size + sizeof(*this);
2289 if (!localeIdentifier.isNull()) { 2412 }
2290 if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(l ocaleIdentifier, "az")) { 2413
2291 if (c == 'i') 2414 UChar32 toUpper(UChar32 c, const AtomicString& localeIdentifier) {
2292 return latinCapitalLetterIWithDotAbove; 2415 if (!localeIdentifier.isNull()) {
2293 if (c == latinSmallLetterDotlessI) 2416 if (localeIdMatchesLang(localeIdentifier, "tr") ||
2294 return 'I'; 2417 localeIdMatchesLang(localeIdentifier, "az")) {
2295 } else if (localeIdMatchesLang(localeIdentifier, "lt")) { 2418 if (c == 'i')
2296 // TODO(rob.buis) implement upper-casing rules for lt 2419 return latinCapitalLetterIWithDotAbove;
2297 // like in StringImpl::upper(locale). 2420 if (c == latinSmallLetterDotlessI)
2298 } 2421 return 'I';
2299 } 2422 } else if (localeIdMatchesLang(localeIdentifier, "lt")) {
2300 2423 // TODO(rob.buis) implement upper-casing rules for lt
2301 return toUpper(c); 2424 // like in StringImpl::upper(locale).
2302 } 2425 }
2303 2426 }
2304 } // namespace WTF 2427
2428 return toUpper(c);
2429 }
2430
2431 } // namespace WTF
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/wtf/text/StringImpl.h ('k') | third_party/WebKit/Source/wtf/text/StringImplCF.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698