Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(131)

Side by Side Diff: third_party/WebKit/Source/wtf/text/StringImpl.cpp

Issue 1436153002: Apply clang-format with Chromium-style without column limit. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org) 2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 * (C) 1999 Antti Koivisto (koivisto@kde.org) 3 * (C) 1999 Antti Koivisto (koivisto@kde.org)
4 * (C) 2001 Dirk Mueller ( mueller@kde.org ) 4 * (C) 2001 Dirk Mueller ( mueller@kde.org )
5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All r ights reserved. 5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All r ights reserved.
6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net) 6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
7 * 7 *
8 * This library is free software; you can redistribute it and/or 8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public 9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either 10 * License as published by the Free Software Foundation; either
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
52 using namespace std; 52 using namespace std;
53 53
54 namespace WTF { 54 namespace WTF {
55 55
56 using namespace Unicode; 56 using namespace Unicode;
57 57
58 static_assert(sizeof(StringImpl) == 3 * sizeof(int), "StringImpl should stay sma ll"); 58 static_assert(sizeof(StringImpl) == 3 * sizeof(int), "StringImpl should stay sma ll");
59 59
60 #ifdef STRING_STATS 60 #ifdef STRING_STATS
61 61
62 static Mutex& statsMutex() 62 static Mutex& statsMutex() {
63 { 63 DEFINE_STATIC_LOCAL(Mutex, mutex, ());
64 DEFINE_STATIC_LOCAL(Mutex, mutex, ()); 64 return mutex;
65 return mutex; 65 }
66 } 66
67 67 static HashSet<void*>& liveStrings() {
68 static HashSet<void*>& liveStrings() 68 // Notice that we can't use HashSet<StringImpl*> because then HashSet would de dup identical strings.
69 { 69 DEFINE_STATIC_LOCAL(HashSet<void*>, strings, ());
70 // Notice that we can't use HashSet<StringImpl*> because then HashSet would dedup identical strings. 70 return strings;
71 DEFINE_STATIC_LOCAL(HashSet<void*>, strings, ()); 71 }
72 return strings; 72
73 } 73 void addStringForStats(StringImpl* string) {
74 74 MutexLocker locker(statsMutex());
75 void addStringForStats(StringImpl* string) 75 liveStrings().add(string);
76 { 76 }
77 MutexLocker locker(statsMutex()); 77
78 liveStrings().add(string); 78 void removeStringForStats(StringImpl* string) {
79 } 79 MutexLocker locker(statsMutex());
80 80 liveStrings().remove(string);
81 void removeStringForStats(StringImpl* string) 81 }
82 { 82
83 MutexLocker locker(statsMutex()); 83 static void fillWithSnippet(const StringImpl* string, Vector<char>& snippet) {
84 liveStrings().remove(string); 84 const unsigned kMaxSnippetLength = 64;
85 } 85 snippet.clear();
86 86
87 static void fillWithSnippet(const StringImpl* string, Vector<char>& snippet) 87 size_t expectedLength = std::min(string->length(), kMaxSnippetLength);
88 { 88 if (expectedLength == kMaxSnippetLength)
89 const unsigned kMaxSnippetLength = 64; 89 expectedLength += 3; // For the "...".
90 snippet.clear(); 90 ++expectedLength; // For the terminating '\0'.
91 91 snippet.reserveCapacity(expectedLength);
92 size_t expectedLength = std::min(string->length(), kMaxSnippetLength); 92
93 if (expectedLength == kMaxSnippetLength) 93 size_t i;
94 expectedLength += 3; // For the "...". 94 for (i = 0; i < string->length() && i < kMaxSnippetLength; ++i) {
95 ++expectedLength; // For the terminating '\0'. 95 UChar c = (*string)[i];
96 snippet.reserveCapacity(expectedLength); 96 if (isASCIIPrintable(c))
97 97 snippet.append(c);
98 size_t i; 98 else
99 for (i = 0; i < string->length() && i < kMaxSnippetLength; ++i) { 99 snippet.append('?');
100 UChar c = (*string)[i]; 100 }
101 if (isASCIIPrintable(c)) 101 if (i < string->length()) {
102 snippet.append(c); 102 snippet.append('.');
103 else 103 snippet.append('.');
104 snippet.append('?'); 104 snippet.append('.');
105 } 105 }
106 if (i < string->length()) { 106 snippet.append('\0');
107 snippet.append('.'); 107 }
108 snippet.append('.'); 108
109 snippet.append('.'); 109 static bool isUnnecessarilyWide(const StringImpl* string) {
110 } 110 if (string->is8Bit())
111 snippet.append('\0'); 111 return false;
112 } 112 UChar c = 0;
113 113 for (unsigned i = 0; i < string->length(); ++i)
114 static bool isUnnecessarilyWide(const StringImpl* string) 114 c |= (*string)[i] >> 8;
115 { 115 return !c;
116 if (string->is8Bit())
117 return false;
118 UChar c = 0;
119 for (unsigned i = 0; i < string->length(); ++i)
120 c |= (*string)[i] >> 8;
121 return !c;
122 } 116 }
123 117
124 class PerStringStats : public RefCounted<PerStringStats> { 118 class PerStringStats : public RefCounted<PerStringStats> {
125 public: 119 public:
126 static PassRefPtr<PerStringStats> create() 120 static PassRefPtr<PerStringStats> create() {
127 { 121 return adoptRef(new PerStringStats);
128 return adoptRef(new PerStringStats); 122 }
129 } 123
130 124 void add(const StringImpl* string) {
131 void add(const StringImpl* string) 125 ++m_numberOfCopies;
132 { 126 if (!m_length) {
133 ++m_numberOfCopies; 127 m_length = string->length();
134 if (!m_length) { 128 fillWithSnippet(string, m_snippet);
135 m_length = string->length(); 129 }
136 fillWithSnippet(string, m_snippet); 130 if (string->isAtomic())
137 } 131 ++m_numberOfAtomicCopies;
138 if (string->isAtomic()) 132 if (isUnnecessarilyWide(string))
139 ++m_numberOfAtomicCopies; 133 m_unnecessarilyWide = true;
140 if (isUnnecessarilyWide(string)) 134 }
141 m_unnecessarilyWide = true; 135
142 } 136 size_t totalCharacters() const {
143 137 return m_numberOfCopies * m_length;
144 size_t totalCharacters() const 138 }
145 { 139
146 return m_numberOfCopies * m_length; 140 void print() {
147 } 141 const char* status = "ok";
148 142 if (m_unnecessarilyWide)
149 void print() 143 status = "16";
150 { 144 dataLogF("%8u copies (%s) of length %8u %s\n", m_numberOfCopies, status, m_l ength, m_snippet.data());
151 const char* status = "ok"; 145 }
152 if (m_unnecessarilyWide) 146
153 status = "16"; 147 bool m_unnecessarilyWide;
154 dataLogF("%8u copies (%s) of length %8u %s\n", m_numberOfCopies, status, m_length, m_snippet.data()); 148 unsigned m_numberOfCopies;
155 } 149 unsigned m_length;
156 150 unsigned m_numberOfAtomicCopies;
157 bool m_unnecessarilyWide; 151 Vector<char> m_snippet;
158 unsigned m_numberOfCopies; 152
159 unsigned m_length; 153 private:
160 unsigned m_numberOfAtomicCopies; 154 PerStringStats()
161 Vector<char> m_snippet; 155 : m_unnecessarilyWide(false), m_numberOfCopies(0), m_length(0), m_numberOf AtomicCopies(0) {
162 156 }
163 private:
164 PerStringStats()
165 : m_unnecessarilyWide(false)
166 , m_numberOfCopies(0)
167 , m_length(0)
168 , m_numberOfAtomicCopies(0)
169 {
170 }
171 }; 157 };
172 158
173 bool operator<(const RefPtr<PerStringStats>& a, const RefPtr<PerStringStats>& b) 159 bool operator<(const RefPtr<PerStringStats>& a, const RefPtr<PerStringStats>& b) {
174 { 160 if (a->m_unnecessarilyWide != b->m_unnecessarilyWide)
175 if (a->m_unnecessarilyWide != b->m_unnecessarilyWide) 161 return !a->m_unnecessarilyWide && b->m_unnecessarilyWide;
176 return !a->m_unnecessarilyWide && b->m_unnecessarilyWide; 162 if (a->totalCharacters() != b->totalCharacters())
177 if (a->totalCharacters() != b->totalCharacters()) 163 return a->totalCharacters() < b->totalCharacters();
178 return a->totalCharacters() < b->totalCharacters(); 164 if (a->m_numberOfCopies != b->m_numberOfCopies)
179 if (a->m_numberOfCopies != b->m_numberOfCopies) 165 return a->m_numberOfCopies < b->m_numberOfCopies;
180 return a->m_numberOfCopies < b->m_numberOfCopies; 166 if (a->m_length != b->m_length)
181 if (a->m_length != b->m_length) 167 return a->m_length < b->m_length;
182 return a->m_length < b->m_length; 168 return a->m_numberOfAtomicCopies < b->m_numberOfAtomicCopies;
183 return a->m_numberOfAtomicCopies < b->m_numberOfAtomicCopies; 169 }
184 } 170
185 171 static void printLiveStringStats(void*) {
186 static void printLiveStringStats(void*) 172 MutexLocker locker(statsMutex());
187 { 173 HashSet<void*>& strings = liveStrings();
188 MutexLocker locker(statsMutex()); 174
189 HashSet<void*>& strings = liveStrings(); 175 HashMap<StringImpl*, RefPtr<PerStringStats>> stats;
190 176 for (HashSet<void*>::iterator iter = strings.begin(); iter != strings.end(); + +iter) {
191 HashMap<StringImpl*, RefPtr<PerStringStats>> stats; 177 StringImpl* string = static_cast<StringImpl*>(*iter);
192 for (HashSet<void*>::iterator iter = strings.begin(); iter != strings.end(); ++iter) { 178 HashMap<StringImpl*, RefPtr<PerStringStats>>::iterator entry = stats.find(st ring);
193 StringImpl* string = static_cast<StringImpl*>(*iter); 179 RefPtr<PerStringStats> value = entry == stats.end() ? RefPtr<PerStringStats> (PerStringStats::create()) : entry->value;
194 HashMap<StringImpl*, RefPtr<PerStringStats>>::iterator entry = stats.fin d(string); 180 value->add(string);
195 RefPtr<PerStringStats> value = entry == stats.end() ? RefPtr<PerStringSt ats>(PerStringStats::create()) : entry->value; 181 stats.set(string, value.release());
196 value->add(string); 182 }
197 stats.set(string, value.release()); 183
198 } 184 Vector<RefPtr<PerStringStats>> all;
199 185 for (HashMap<StringImpl*, RefPtr<PerStringStats>>::iterator iter = stats.begin (); iter != stats.end(); ++iter)
200 Vector<RefPtr<PerStringStats>> all; 186 all.append(iter->value);
201 for (HashMap<StringImpl*, RefPtr<PerStringStats>>::iterator iter = stats.beg in(); iter != stats.end(); ++iter) 187
202 all.append(iter->value); 188 std::sort(all.begin(), all.end());
203 189 std::reverse(all.begin(), all.end());
204 std::sort(all.begin(), all.end()); 190 for (size_t i = 0; i < 20 && i < all.size(); ++i)
205 std::reverse(all.begin(), all.end()); 191 all[i]->print();
206 for (size_t i = 0; i < 20 && i < all.size(); ++i)
207 all[i]->print();
208 } 192 }
209 193
210 StringStats StringImpl::m_stringStats; 194 StringStats StringImpl::m_stringStats;
211 195
212 unsigned StringStats::s_stringRemovesTillPrintStats = StringStats::s_printString StatsFrequency; 196 unsigned StringStats::s_stringRemovesTillPrintStats = StringStats::s_printString StatsFrequency;
213 197
214 void StringStats::removeString(StringImpl* string) 198 void StringStats::removeString(StringImpl* string) {
215 { 199 unsigned length = string->length();
216 unsigned length = string->length(); 200 --m_totalNumberStrings;
217 --m_totalNumberStrings; 201
218 202 if (string->is8Bit()) {
219 if (string->is8Bit()) { 203 --m_number8BitStrings;
220 --m_number8BitStrings; 204 m_total8BitData -= length;
221 m_total8BitData -= length; 205 } else {
222 } else { 206 --m_number16BitStrings;
223 --m_number16BitStrings; 207 m_total16BitData -= length;
224 m_total16BitData -= length; 208 }
225 } 209
226 210 if (!--s_stringRemovesTillPrintStats) {
227 if (!--s_stringRemovesTillPrintStats) { 211 s_stringRemovesTillPrintStats = s_printStringStatsFrequency;
228 s_stringRemovesTillPrintStats = s_printStringStatsFrequency; 212 printStats();
229 printStats(); 213 }
230 } 214 }
231 } 215
232 216 void StringStats::printStats() {
233 void StringStats::printStats() 217 dataLogF("String stats for process id %d:\n", getpid());
234 { 218
235 dataLogF("String stats for process id %d:\n", getpid()); 219 unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitData;
236 220 double percent8Bit = m_totalNumberStrings ? ((double)m_number8BitStrings * 100 ) / (double)m_totalNumberStrings : 0.0;
237 unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitDat a; 221 double average8bitLength = m_number8BitStrings ? (double)m_total8BitData / (do uble)m_number8BitStrings : 0.0;
238 double percent8Bit = m_totalNumberStrings ? ((double)m_number8BitStrings * 1 00) / (double)m_totalNumberStrings : 0.0; 222 dataLogF("%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length %6 .1f\n", m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData, aver age8bitLength);
239 double average8bitLength = m_number8BitStrings ? (double)m_total8BitData / ( double)m_number8BitStrings : 0.0; 223
240 dataLogF("%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData, av erage8bitLength); 224 double percent16Bit = m_totalNumberStrings ? ((double)m_number16BitStrings * 1 00) / (double)m_totalNumberStrings : 0.0;
241 225 double average16bitLength = m_number16BitStrings ? (double)m_total16BitData / (double)m_number16BitStrings : 0.0;
242 double percent16Bit = m_totalNumberStrings ? ((double)m_number16BitStrings * 100) / (double)m_totalNumberStrings : 0.0; 226 dataLogF("%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length %6 .1f\n", m_number16BitStrings, percent16Bit, m_total16BitData, m_total16BitData * 2, average16bitLength);
243 double average16bitLength = m_number16BitStrings ? (double)m_total16BitData / (double)m_number16BitStrings : 0.0; 227
244 dataLogF("%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number16BitStrings, percent16Bit, m_total16BitData, m_total16BitData * 2, average16bitLength); 228 double averageLength = m_totalNumberStrings ? (double)totalNumberCharacters / (double)m_totalNumberStrings : 0.0;
245 229 unsigned long long totalDataBytes = m_total8BitData + m_total16BitData * 2;
246 double averageLength = m_totalNumberStrings ? (double)totalNumberCharacters / (double)m_totalNumberStrings : 0.0; 230 dataLogF("%8u Total %12llu chars %12llu bytes avg length %6. 1f\n", m_totalNumberStrings, totalNumberCharacters, totalDataBytes, averageLengt h);
247 unsigned long long totalDataBytes = m_total8BitData + m_total16BitData * 2; 231 unsigned long long totalSavedBytes = m_total8BitData;
248 dataLogF("%8u Total %12llu chars %12llu bytes avg length % 6.1f\n", m_totalNumberStrings, totalNumberCharacters, totalDataBytes, averageLen gth); 232 double percentSavings = totalSavedBytes ? ((double)totalSavedBytes * 100) / (d ouble)(totalDataBytes + totalSavedBytes) : 0.0;
249 unsigned long long totalSavedBytes = m_total8BitData; 233 dataLogF(" Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes, p ercentSavings);
250 double percentSavings = totalSavedBytes ? ((double)totalSavedBytes * 100) / (double)(totalDataBytes + totalSavedBytes) : 0.0; 234
251 dataLogF(" Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes, percentSavings); 235 unsigned totalOverhead = m_totalNumberStrings * sizeof(StringImpl);
252 236 double overheadPercent = (double)totalOverhead / (double)totalDataBytes * 100;
253 unsigned totalOverhead = m_totalNumberStrings * sizeof(StringImpl); 237 dataLogF(" StringImpl overheader: %8u (%5.2f%%)\n", totalOverhead, ove rheadPercent);
254 double overheadPercent = (double)totalOverhead / (double)totalDataBytes * 10 0; 238
255 dataLogF(" StringImpl overheader: %8u (%5.2f%%)\n", totalOverhead, o verheadPercent); 239 internal::callOnMainThread(&printLiveStringStats, nullptr);
256
257 internal::callOnMainThread(&printLiveStringStats, nullptr);
258 } 240 }
259 #endif 241 #endif
260 242
261 void* StringImpl::operator new(size_t size) 243 void* StringImpl::operator new(size_t size) {
262 { 244 ASSERT(size == sizeof(StringImpl));
263 ASSERT(size == sizeof(StringImpl)); 245 return Partitions::bufferMalloc(size);
264 return Partitions::bufferMalloc(size); 246 }
265 } 247
266 248 void StringImpl::operator delete(void* ptr) {
267 void StringImpl::operator delete(void* ptr) 249 Partitions::bufferFree(ptr);
268 { 250 }
269 Partitions::bufferFree(ptr); 251
270 } 252 inline StringImpl::~StringImpl() {
271 253 ASSERT(!isStatic());
272 inline StringImpl::~StringImpl() 254
273 { 255 STRING_STATS_REMOVE_STRING(this);
274 ASSERT(!isStatic()); 256
275 257 if (isAtomic())
276 STRING_STATS_REMOVE_STRING(this); 258 AtomicString::remove(this);
277 259 }
278 if (isAtomic()) 260
279 AtomicString::remove(this); 261 void StringImpl::destroyIfNotStatic() {
280 } 262 if (!isStatic())
281 263 delete this;
282 void StringImpl::destroyIfNotStatic() 264 }
283 { 265
284 if (!isStatic()) 266 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, LChar*& data) {
285 delete this; 267 if (!length) {
286 } 268 data = 0;
287 269 return empty();
288 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, LChar*& data) 270 }
289 { 271
290 if (!length) { 272 // Allocate a single buffer large enough to contain the StringImpl
291 data = 0; 273 // struct as well as the data which it contains. This removes one
292 return empty(); 274 // heap allocation from this call.
293 } 275 StringImpl* string = static_cast<StringImpl*>(Partitions::bufferMalloc(allocat ionSize<LChar>(length)));
294 276
295 // Allocate a single buffer large enough to contain the StringImpl 277 data = reinterpret_cast<LChar*>(string + 1);
296 // struct as well as the data which it contains. This removes one 278 return adoptRef(new (string) StringImpl(length, Force8BitConstructor));
297 // heap allocation from this call. 279 }
298 StringImpl* string = static_cast<StringImpl*>(Partitions::bufferMalloc(alloc ationSize<LChar>(length))); 280
299 281 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data) {
300 data = reinterpret_cast<LChar*>(string + 1); 282 if (!length) {
301 return adoptRef(new (string) StringImpl(length, Force8BitConstructor)); 283 data = 0;
302 } 284 return empty();
303 285 }
304 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data) 286
305 { 287 // Allocate a single buffer large enough to contain the StringImpl
306 if (!length) { 288 // struct as well as the data which it contains. This removes one
307 data = 0; 289 // heap allocation from this call.
308 return empty(); 290 StringImpl* string = static_cast<StringImpl*>(Partitions::bufferMalloc(allocat ionSize<UChar>(length)));
309 } 291
310 292 data = reinterpret_cast<UChar*>(string + 1);
311 // Allocate a single buffer large enough to contain the StringImpl 293 return adoptRef(new (string) StringImpl(length));
312 // struct as well as the data which it contains. This removes one 294 }
313 // heap allocation from this call. 295
314 StringImpl* string = static_cast<StringImpl*>(Partitions::bufferMalloc(alloc ationSize<UChar>(length))); 296 static StaticStringsTable& staticStrings() {
315 297 DEFINE_STATIC_LOCAL(StaticStringsTable, staticStrings, ());
316 data = reinterpret_cast<UChar*>(string + 1); 298 return staticStrings;
317 return adoptRef(new (string) StringImpl(length));
318 }
319
320 static StaticStringsTable& staticStrings()
321 {
322 DEFINE_STATIC_LOCAL(StaticStringsTable, staticStrings, ());
323 return staticStrings;
324 } 299 }
325 300
326 #if ENABLE(ASSERT) 301 #if ENABLE(ASSERT)
327 static bool s_allowCreationOfStaticStrings = true; 302 static bool s_allowCreationOfStaticStrings = true;
328 #endif 303 #endif
329 304
330 const StaticStringsTable& StringImpl::allStaticStrings() 305 const StaticStringsTable& StringImpl::allStaticStrings() {
331 { 306 return staticStrings();
332 return staticStrings(); 307 }
333 } 308
334 309 void StringImpl::freezeStaticStrings() {
335 void StringImpl::freezeStaticStrings() 310 ASSERT(isMainThread());
336 {
337 ASSERT(isMainThread());
338 311
339 #if ENABLE(ASSERT) 312 #if ENABLE(ASSERT)
340 s_allowCreationOfStaticStrings = false; 313 s_allowCreationOfStaticStrings = false;
341 #endif 314 #endif
342 } 315 }
343 316
344 unsigned StringImpl::m_highestStaticStringLength = 0; 317 unsigned StringImpl::m_highestStaticStringLength = 0;
345 318
346 StringImpl* StringImpl::createStatic(const char* string, unsigned length, unsign ed hash) 319 StringImpl* StringImpl::createStatic(const char* string, unsigned length, unsign ed hash) {
347 { 320 ASSERT(s_allowCreationOfStaticStrings);
348 ASSERT(s_allowCreationOfStaticStrings); 321 ASSERT(string);
349 ASSERT(string); 322 ASSERT(length);
350 ASSERT(length); 323
351 324 StaticStringsTable::const_iterator it = staticStrings().find(hash);
352 StaticStringsTable::const_iterator it = staticStrings().find(hash); 325 if (it != staticStrings().end()) {
353 if (it != staticStrings().end()) { 326 ASSERT(!memcmp(string, it->value + 1, length * sizeof(LChar)));
354 ASSERT(!memcmp(string, it->value + 1, length * sizeof(LChar))); 327 return it->value;
355 return it->value; 328 }
356 } 329
357 330 // Allocate a single buffer large enough to contain the StringImpl
358 // Allocate a single buffer large enough to contain the StringImpl 331 // struct as well as the data which it contains. This removes one
359 // struct as well as the data which it contains. This removes one 332 // heap allocation from this call.
360 // heap allocation from this call. 333 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(Strin gImpl)) / sizeof(LChar)));
361 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(Str ingImpl)) / sizeof(LChar))); 334 size_t size = sizeof(StringImpl) + length * sizeof(LChar);
362 size_t size = sizeof(StringImpl) + length * sizeof(LChar); 335
363 336 WTF_ANNOTATE_SCOPED_MEMORY_LEAK;
364 WTF_ANNOTATE_SCOPED_MEMORY_LEAK; 337 StringImpl* impl = static_cast<StringImpl*>(Partitions::bufferMalloc(size));
365 StringImpl* impl = static_cast<StringImpl*>(Partitions::bufferMalloc(size)); 338
366 339 LChar* data = reinterpret_cast<LChar*>(impl + 1);
367 LChar* data = reinterpret_cast<LChar*>(impl + 1); 340 impl = new (impl) StringImpl(length, hash, StaticString);
368 impl = new (impl) StringImpl(length, hash, StaticString); 341 memcpy(data, string, length * sizeof(LChar));
369 memcpy(data, string, length * sizeof(LChar));
370 #if ENABLE(ASSERT) 342 #if ENABLE(ASSERT)
371 impl->assertHashIsCorrect(); 343 impl->assertHashIsCorrect();
372 #endif 344 #endif
373 345
374 ASSERT(isMainThread()); 346 ASSERT(isMainThread());
375 m_highestStaticStringLength = std::max(m_highestStaticStringLength, length); 347 m_highestStaticStringLength = std::max(m_highestStaticStringLength, length);
376 staticStrings().add(hash, impl); 348 staticStrings().add(hash, impl);
377 WTF_ANNOTATE_BENIGN_RACE(impl, 349 WTF_ANNOTATE_BENIGN_RACE(impl,
378 "Benign race on the reference counter of a static string created by Stri ngImpl::createStatic"); 350 "Benign race on the reference counter of a static str ing created by StringImpl::createStatic");
379 351
380 return impl; 352 return impl;
381 } 353 }
382 354
383 void StringImpl::reserveStaticStringsCapacityForSize(unsigned size) 355 void StringImpl::reserveStaticStringsCapacityForSize(unsigned size) {
384 { 356 ASSERT(s_allowCreationOfStaticStrings);
385 ASSERT(s_allowCreationOfStaticStrings); 357 staticStrings().reserveCapacityForSize(size);
386 staticStrings().reserveCapacityForSize(size); 358 }
387 } 359
388 360 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned leng th) {
389 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned leng th) 361 if (!characters || !length)
390 { 362 return empty();
391 if (!characters || !length) 363
392 return empty(); 364 UChar* data;
393 365 RefPtr<StringImpl> string = createUninitialized(length, data);
394 UChar* data; 366 memcpy(data, characters, length * sizeof(UChar));
395 RefPtr<StringImpl> string = createUninitialized(length, data); 367 return string.release();
396 memcpy(data, characters, length * sizeof(UChar)); 368 }
397 return string.release(); 369
398 } 370 PassRefPtr<StringImpl> StringImpl::create(const LChar* characters, unsigned leng th) {
399 371 if (!characters || !length)
400 PassRefPtr<StringImpl> StringImpl::create(const LChar* characters, unsigned leng th) 372 return empty();
401 { 373
402 if (!characters || !length) 374 LChar* data;
403 return empty(); 375 RefPtr<StringImpl> string = createUninitialized(length, data);
404 376 memcpy(data, characters, length * sizeof(LChar));
405 LChar* data; 377 return string.release();
406 RefPtr<StringImpl> string = createUninitialized(length, data); 378 }
407 memcpy(data, characters, length * sizeof(LChar)); 379
408 return string.release(); 380 PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters, unsigned length) {
409 } 381 if (!characters || !length)
410 382 return empty();
411 PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters, unsigned length) 383
412 { 384 LChar* data;
413 if (!characters || !length) 385 RefPtr<StringImpl> string = createUninitialized(length, data);
414 return empty(); 386
415 387 for (size_t i = 0; i < length; ++i) {
416 LChar* data; 388 if (characters[i] & 0xff00)
417 RefPtr<StringImpl> string = createUninitialized(length, data); 389 return create(characters, length);
418 390 data[i] = static_cast<LChar>(characters[i]);
419 for (size_t i = 0; i < length; ++i) { 391 }
420 if (characters[i] & 0xff00) 392
421 return create(characters, length); 393 return string.release();
422 data[i] = static_cast<LChar>(characters[i]); 394 }
423 } 395
424 396 PassRefPtr<StringImpl> StringImpl::create(const LChar* string) {
425 return string.release(); 397 if (!string)
426 } 398 return empty();
427 399 size_t length = strlen(reinterpret_cast<const char*>(string));
428 PassRefPtr<StringImpl> StringImpl::create(const LChar* string) 400 RELEASE_ASSERT(length <= numeric_limits<unsigned>::max());
429 { 401 return create(string, length);
430 if (!string) 402 }
431 return empty(); 403
432 size_t length = strlen(reinterpret_cast<const char*>(string)); 404 bool StringImpl::containsOnlyWhitespace() {
433 RELEASE_ASSERT(length <= numeric_limits<unsigned>::max()); 405 // FIXME: The definition of whitespace here includes a number of characters
434 return create(string, length); 406 // that are not whitespace from the point of view of LayoutText; I wonder if
435 } 407 // that's a problem in practice.
436 408 if (is8Bit()) {
437 bool StringImpl::containsOnlyWhitespace()
438 {
439 // FIXME: The definition of whitespace here includes a number of characters
440 // that are not whitespace from the point of view of LayoutText; I wonder if
441 // that's a problem in practice.
442 if (is8Bit()) {
443 for (unsigned i = 0; i < m_length; ++i) {
444 UChar c = characters8()[i];
445 if (!isASCIISpace(c))
446 return false;
447 }
448
449 return true;
450 }
451
452 for (unsigned i = 0; i < m_length; ++i) { 409 for (unsigned i = 0; i < m_length; ++i) {
453 UChar c = characters16()[i]; 410 UChar c = characters8()[i];
454 if (!isASCIISpace(c)) 411 if (!isASCIISpace(c))
455 return false; 412 return false;
456 } 413 }
414
457 return true; 415 return true;
458 } 416 }
459 417
460 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length) 418 for (unsigned i = 0; i < m_length; ++i) {
461 { 419 UChar c = characters16()[i];
462 if (start >= m_length) 420 if (!isASCIISpace(c))
463 return empty(); 421 return false;
464 unsigned maxLength = m_length - start; 422 }
465 if (length >= maxLength) { 423 return true;
466 if (!start) 424 }
467 return this; 425
468 length = maxLength; 426 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length) {
469 } 427 if (start >= m_length)
470 if (is8Bit()) 428 return empty();
471 return create(characters8() + start, length); 429 unsigned maxLength = m_length - start;
472 430 if (length >= maxLength) {
473 return create(characters16() + start, length); 431 if (!start)
474 } 432 return this;
475 433 length = maxLength;
476 UChar32 StringImpl::characterStartingAt(unsigned i) 434 }
477 { 435 if (is8Bit())
478 if (is8Bit()) 436 return create(characters8() + start, length);
479 return characters8()[i]; 437
480 if (U16_IS_SINGLE(characters16()[i])) 438 return create(characters16() + start, length);
481 return characters16()[i]; 439 }
482 if (i + 1 < m_length && U16_IS_LEAD(characters16()[i]) && U16_IS_TRAIL(chara cters16()[i + 1])) 440
483 return U16_GET_SUPPLEMENTARY(characters16()[i], characters16()[i + 1]); 441 UChar32 StringImpl::characterStartingAt(unsigned i) {
484 return 0; 442 if (is8Bit())
485 } 443 return characters8()[i];
486 444 if (U16_IS_SINGLE(characters16()[i]))
487 PassRefPtr<StringImpl> StringImpl::lower() 445 return characters16()[i];
488 { 446 if (i + 1 < m_length && U16_IS_LEAD(characters16()[i]) && U16_IS_TRAIL(charact ers16()[i + 1]))
489 // Note: This is a hot function in the Dromaeo benchmark, specifically the 447 return U16_GET_SUPPLEMENTARY(characters16()[i], characters16()[i + 1]);
490 // no-op code path up through the first 'return' statement. 448 return 0;
491 449 }
492 // First scan the string for uppercase and non-ASCII characters: 450
493 if (is8Bit()) { 451 PassRefPtr<StringImpl> StringImpl::lower() {
494 unsigned firstIndexToBeLowered = m_length; 452 // Note: This is a hot function in the Dromaeo benchmark, specifically the
495 for (unsigned i = 0; i < m_length; ++i) { 453 // no-op code path up through the first 'return' statement.
496 LChar ch = characters8()[i]; 454
497 if (UNLIKELY(isASCIIUpper(ch) || ch & ~0x7F)) { 455 // First scan the string for uppercase and non-ASCII characters:
498 firstIndexToBeLowered = i; 456 if (is8Bit()) {
499 break; 457 unsigned firstIndexToBeLowered = m_length;
500 } 458 for (unsigned i = 0; i < m_length; ++i) {
501 } 459 LChar ch = characters8()[i];
502 460 if (UNLIKELY(isASCIIUpper(ch) || ch & ~0x7F)) {
503 // Nothing to do if the string is all ASCII with no uppercase. 461 firstIndexToBeLowered = i;
504 if (firstIndexToBeLowered == m_length) 462 break;
505 return this; 463 }
506 464 }
507 LChar* data8; 465
508 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);
509 memcpy(data8, characters8(), firstIndexToBeLowered);
510
511 for (unsigned i = firstIndexToBeLowered; i < m_length; ++i) {
512 LChar ch = characters8()[i];
513 data8[i] = UNLIKELY(ch & ~0x7F)
514 ? static_cast<LChar>(Unicode::toLower(ch)) : toASCIILower(ch);
515 }
516
517 return newImpl.release();
518 }
519
520 bool noUpper = true;
521 UChar ored = 0;
522
523 const UChar* end = characters16() + m_length;
524 for (const UChar* chp = characters16(); chp != end; ++chp) {
525 if (UNLIKELY(isASCIIUpper(*chp)))
526 noUpper = false;
527 ored |= *chp;
528 }
529 // Nothing to do if the string is all ASCII with no uppercase. 466 // Nothing to do if the string is all ASCII with no uppercase.
530 if (noUpper && !(ored & ~0x7F)) 467 if (firstIndexToBeLowered == m_length)
531 return this; 468 return this;
532 469
533 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::ma x())); 470 LChar* data8;
534 int32_t length = m_length; 471 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);
535 472 memcpy(data8, characters8(), firstIndexToBeLowered);
536 if (!(ored & ~0x7F)) { 473
537 UChar* data16; 474 for (unsigned i = firstIndexToBeLowered; i < m_length; ++i) {
538 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); 475 LChar ch = characters8()[i];
539 476 data8[i] = UNLIKELY(ch & ~0x7F)
540 for (int32_t i = 0; i < length; ++i) { 477 ? static_cast<LChar>(Unicode::toLower(ch))
541 UChar c = characters16()[i]; 478 : toASCIILower(ch);
542 data16[i] = toASCIILower(c); 479 }
543 } 480
544 return newImpl.release(); 481 return newImpl.release();
545 } 482 }
546 483
547 // Do a slower implementation for cases that include non-ASCII characters. 484 bool noUpper = true;
485 UChar ored = 0;
486
487 const UChar* end = characters16() + m_length;
488 for (const UChar* chp = characters16(); chp != end; ++chp) {
489 if (UNLIKELY(isASCIIUpper(*chp)))
490 noUpper = false;
491 ored |= *chp;
492 }
493 // Nothing to do if the string is all ASCII with no uppercase.
494 if (noUpper && !(ored & ~0x7F))
495 return this;
496
497 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max( )));
498 int32_t length = m_length;
499
500 if (!(ored & ~0x7F)) {
548 UChar* data16; 501 UChar* data16;
549 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); 502 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
550 503
551 bool error; 504 for (int32_t i = 0; i < length; ++i) {
552 int32_t realLength = Unicode::toLower(data16, length, characters16(), m_leng th, &error); 505 UChar c = characters16()[i];
553 if (!error && realLength == length) 506 data16[i] = toASCIILower(c);
554 return newImpl.release(); 507 }
555
556 newImpl = createUninitialized(realLength, data16);
557 Unicode::toLower(data16, realLength, characters16(), m_length, &error);
558 if (error)
559 return this;
560 return newImpl.release(); 508 return newImpl.release();
561 } 509 }
562 510
563 PassRefPtr<StringImpl> StringImpl::upper() 511 // Do a slower implementation for cases that include non-ASCII characters.
564 { 512 UChar* data16;
565 // This function could be optimized for no-op cases the way lower() is, 513 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
566 // but in empirical testing, few actual calls to upper() are no-ops, so 514
567 // it wouldn't be worth the extra time for pre-scanning. 515 bool error;
568 516 int32_t realLength = Unicode::toLower(data16, length, characters16(), m_length , &error);
569 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::ma x())); 517 if (!error && realLength == length)
570 int32_t length = m_length; 518 return newImpl.release();
571 519
572 if (is8Bit()) { 520 newImpl = createUninitialized(realLength, data16);
573 LChar* data8; 521 Unicode::toLower(data16, realLength, characters16(), m_length, &error);
574 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8); 522 if (error)
575 523 return this;
576 // Do a faster loop for the case where all the characters are ASCII. 524 return newImpl.release();
577 LChar ored = 0; 525 }
578 for (int i = 0; i < length; ++i) { 526
579 LChar c = characters8()[i]; 527 PassRefPtr<StringImpl> StringImpl::upper() {
580 ored |= c; 528 // This function could be optimized for no-op cases the way lower() is,
581 data8[i] = toASCIIUpper(c); 529 // but in empirical testing, few actual calls to upper() are no-ops, so
582 } 530 // it wouldn't be worth the extra time for pre-scanning.
583 if (!(ored & ~0x7F)) 531
584 return newImpl.release(); 532 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max( )));
585 533 int32_t length = m_length;
586 // Do a slower implementation for cases that include non-ASCII Latin-1 c haracters. 534
587 int numberSharpSCharacters = 0; 535 if (is8Bit()) {
588 536 LChar* data8;
589 // There are two special cases. 537 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);
590 // 1. latin-1 characters when converted to upper case are 16 bit charac ters. 538
591 // 2. Lower case sharp-S converts to "SS" (two characters) 539 // Do a faster loop for the case where all the characters are ASCII.
592 for (int32_t i = 0; i < length; ++i) { 540 LChar ored = 0;
593 LChar c = characters8()[i]; 541 for (int i = 0; i < length; ++i) {
594 if (UNLIKELY(c == smallLetterSharpSCharacter)) 542 LChar c = characters8()[i];
595 ++numberSharpSCharacters; 543 ored |= c;
596 UChar upper = static_cast<UChar>(Unicode::toUpper(c)); 544 data8[i] = toASCIIUpper(c);
597 if (UNLIKELY(upper > 0xff)) { 545 }
598 // Since this upper-cased character does not fit in an 8-bit str ing, we need to take the 16-bit path. 546 if (!(ored & ~0x7F))
599 goto upconvert; 547 return newImpl.release();
600 } 548
601 data8[i] = static_cast<LChar>(upper); 549 // Do a slower implementation for cases that include non-ASCII Latin-1 chara cters.
602 } 550 int numberSharpSCharacters = 0;
603 551
604 if (!numberSharpSCharacters) 552 // There are two special cases.
605 return newImpl.release(); 553 // 1. latin-1 characters when converted to upper case are 16 bit characters .
606 554 // 2. Lower case sharp-S converts to "SS" (two characters)
607 // We have numberSSCharacters sharp-s characters, but none of the other special characters. 555 for (int32_t i = 0; i < length; ++i) {
608 newImpl = createUninitialized(m_length + numberSharpSCharacters, data8); 556 LChar c = characters8()[i];
609 557 if (UNLIKELY(c == smallLetterSharpSCharacter))
610 LChar* dest = data8; 558 ++numberSharpSCharacters;
611 559 UChar upper = static_cast<UChar>(Unicode::toUpper(c));
612 for (int32_t i = 0; i < length; ++i) { 560 if (UNLIKELY(upper > 0xff)) {
613 LChar c = characters8()[i]; 561 // Since this upper-cased character does not fit in an 8-bit string, we need to take the 16-bit path.
614 if (c == smallLetterSharpSCharacter) { 562 goto upconvert;
615 *dest++ = 'S'; 563 }
616 *dest++ = 'S'; 564 data8[i] = static_cast<LChar>(upper);
617 } else { 565 }
618 *dest++ = static_cast<LChar>(Unicode::toUpper(c)); 566
619 } 567 if (!numberSharpSCharacters)
620 } 568 return newImpl.release();
621 569
622 return newImpl.release(); 570 // We have numberSSCharacters sharp-s characters, but none of the other spec ial characters.
623 } 571 newImpl = createUninitialized(m_length + numberSharpSCharacters, data8);
572
573 LChar* dest = data8;
574
575 for (int32_t i = 0; i < length; ++i) {
576 LChar c = characters8()[i];
577 if (c == smallLetterSharpSCharacter) {
578 *dest++ = 'S';
579 *dest++ = 'S';
580 } else {
581 *dest++ = static_cast<LChar>(Unicode::toUpper(c));
582 }
583 }
584
585 return newImpl.release();
586 }
624 587
625 upconvert: 588 upconvert:
626 RefPtr<StringImpl> upconverted = upconvertedString(); 589 RefPtr<StringImpl> upconverted = upconvertedString();
627 const UChar* source16 = upconverted->characters16(); 590 const UChar* source16 = upconverted->characters16();
628 591
629 UChar* data16; 592 UChar* data16;
630 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); 593 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
631 594
632 // Do a faster loop for the case where all the characters are ASCII. 595 // Do a faster loop for the case where all the characters are ASCII.
633 UChar ored = 0; 596 UChar ored = 0;
634 for (int i = 0; i < length; ++i) { 597 for (int i = 0; i < length; ++i) {
635 UChar c = source16[i]; 598 UChar c = source16[i];
636 ored |= c; 599 ored |= c;
637 data16[i] = toASCIIUpper(c); 600 data16[i] = toASCIIUpper(c);
638 } 601 }
639 if (!(ored & ~0x7F)) 602 if (!(ored & ~0x7F))
640 return newImpl.release();
641
642 // Do a slower implementation for cases that include non-ASCII characters.
643 bool error;
644 int32_t realLength = Unicode::toUpper(data16, length, source16, m_length, &e rror);
645 if (!error && realLength == length)
646 return newImpl;
647 newImpl = createUninitialized(realLength, data16);
648 Unicode::toUpper(data16, realLength, source16, m_length, &error);
649 if (error)
650 return this;
651 return newImpl.release(); 603 return newImpl.release();
652 } 604
653 605 // Do a slower implementation for cases that include non-ASCII characters.
654 static bool inline localeIdMatchesLang(const AtomicString& localeId, const char* lang) 606 bool error;
655 { 607 int32_t realLength = Unicode::toUpper(data16, length, source16, m_length, &err or);
656 if (equalIgnoringCase(localeId, lang)) 608 if (!error && realLength == length)
657 return true; 609 return newImpl;
658 static char localeIdPrefix[4]; 610 newImpl = createUninitialized(realLength, data16);
659 static const char delimeter[4] = "-_@"; 611 Unicode::toUpper(data16, realLength, source16, m_length, &error);
660 612 if (error)
661 size_t langLength = strlen(lang); 613 return this;
662 RELEASE_ASSERT(langLength >= 2 && langLength <= 3); 614 return newImpl.release();
663 strncpy(localeIdPrefix, lang, langLength); 615 }
664 for (int i = 0; i < 3; ++i) { 616
665 localeIdPrefix[langLength] = delimeter[i]; 617 static bool inline localeIdMatchesLang(const AtomicString& localeId, const char* lang) {
666 // case-insensitive comparison 618 if (equalIgnoringCase(localeId, lang))
667 if (localeId.impl() && localeId.impl()->startsWith(localeIdPrefix, langL ength + 1, TextCaseInsensitive)) 619 return true;
668 return true; 620 static char localeIdPrefix[4];
669 } 621 static const char delimeter[4] = "-_@";
670 return false; 622
623 size_t langLength = strlen(lang);
624 RELEASE_ASSERT(langLength >= 2 && langLength <= 3);
625 strncpy(localeIdPrefix, lang, langLength);
626 for (int i = 0; i < 3; ++i) {
627 localeIdPrefix[langLength] = delimeter[i];
628 // case-insensitive comparison
629 if (localeId.impl() && localeId.impl()->startsWith(localeIdPrefix, langLengt h + 1, TextCaseInsensitive))
630 return true;
631 }
632 return false;
671 } 633 }
672 634
673 typedef int32_t (*icuCaseConverter)(UChar*, int32_t, const UChar*, int32_t, cons t char*, UErrorCode*); 635 typedef int32_t (*icuCaseConverter)(UChar*, int32_t, const UChar*, int32_t, cons t char*, UErrorCode*);
674 636
675 static PassRefPtr<StringImpl> caseConvert(const UChar* source16, size_t length, icuCaseConverter converter, const char* locale, StringImpl* originalString) 637 static PassRefPtr<StringImpl> caseConvert(const UChar* source16, size_t length, icuCaseConverter converter, const char* locale, StringImpl* originalString) {
676 { 638 UChar* data16;
677 UChar* data16; 639 size_t targetLength = length;
678 size_t targetLength = length; 640 RefPtr<StringImpl> output = StringImpl::createUninitialized(length, data16);
679 RefPtr<StringImpl> output = StringImpl::createUninitialized(length, data16); 641 do {
680 do {
681 UErrorCode status = U_ZERO_ERROR;
682 targetLength = converter(data16, targetLength, source16, length, locale, &status);
683 if (U_SUCCESS(status)) {
684 if (length > 0)
685 return output->substring(0, targetLength);
686 return output.release();
687 }
688 if (status != U_BUFFER_OVERFLOW_ERROR)
689 return originalString;
690 // Expand the buffer.
691 output = StringImpl::createUninitialized(targetLength, data16);
692 } while (true);
693 }
694
695 PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier)
696 {
697 // Use the more-optimized code path most of the time.
698 // Only Turkic (tr and az) languages and Lithuanian requires
699 // locale-specific lowercasing rules. Even though CLDR has el-Lower,
700 // it's identical to the locale-agnostic lowercasing. Context-dependent
701 // handling of Greek capital sigma is built into the common lowercasing
702 // function in ICU.
703 const char* localeForConversion = 0;
704 if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(local eIdentifier, "az"))
705 localeForConversion = "tr";
706 else if (localeIdMatchesLang(localeIdentifier, "lt"))
707 localeForConversion = "lt";
708 else
709 return lower();
710
711 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
712 CRASH();
713 int length = m_length;
714
715 RefPtr<StringImpl> upconverted = upconvertedString();
716 const UChar* source16 = upconverted->characters16();
717 return caseConvert(source16, length, u_strToLower, localeForConversion, this );
718 }
719
720 PassRefPtr<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier)
721 {
722 // Use the more-optimized code path most of the time.
723 // Only Turkic (tr and az) languages and Greek require locale-specific
724 // lowercasing rules.
725 icu::UnicodeString transliteratorId;
726 const char* localeForConversion = 0;
727 if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(local eIdentifier, "az"))
728 localeForConversion = "tr";
729 else if (localeIdMatchesLang(localeIdentifier, "el"))
730 transliteratorId = UNICODE_STRING_SIMPLE("el-Upper");
731 else if (localeIdMatchesLang(localeIdentifier, "lt"))
732 localeForConversion = "lt";
733 else
734 return upper();
735
736 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
737 CRASH();
738 int length = m_length;
739
740 RefPtr<StringImpl> upconverted = upconvertedString();
741 const UChar* source16 = upconverted->characters16();
742
743 if (localeForConversion)
744 return caseConvert(source16, length, u_strToUpper, localeForConversion, this);
745
746 // TODO(jungshik): Cache transliterator if perf penaly warrants it for Greek .
747 UErrorCode status = U_ZERO_ERROR; 642 UErrorCode status = U_ZERO_ERROR;
748 OwnPtr<icu::Transliterator> translit = 643 targetLength = converter(data16, targetLength, source16, length, locale, &st atus);
749 adoptPtr(icu::Transliterator::createInstance(transliteratorId, UTRANS_FO RWARD, status)); 644 if (U_SUCCESS(status)) {
750 if (U_FAILURE(status)) 645 if (length > 0)
751 return upper(); 646 return output->substring(0, targetLength);
752 647 return output.release();
753 // target will be copy-on-write. 648 }
754 icu::UnicodeString target(false, source16, length); 649 if (status != U_BUFFER_OVERFLOW_ERROR)
755 translit->transliterate(target); 650 return originalString;
756 651 // Expand the buffer.
757 return create(target.getBuffer(), target.length()); 652 output = StringImpl::createUninitialized(targetLength, data16);
758 } 653 } while (true);
759 654 }
760 PassRefPtr<StringImpl> StringImpl::fill(UChar character) 655
761 { 656 PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier) {
762 if (!(character & ~0x7F)) { 657 // Use the more-optimized code path most of the time.
763 LChar* data; 658 // Only Turkic (tr and az) languages and Lithuanian requires
764 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 659 // locale-specific lowercasing rules. Even though CLDR has el-Lower,
765 for (unsigned i = 0; i < m_length; ++i) 660 // it's identical to the locale-agnostic lowercasing. Context-dependent
766 data[i] = static_cast<LChar>(character); 661 // handling of Greek capital sigma is built into the common lowercasing
767 return newImpl.release(); 662 // function in ICU.
768 } 663 const char* localeForConversion = 0;
769 UChar* data; 664 if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(localeI dentifier, "az"))
665 localeForConversion = "tr";
666 else if (localeIdMatchesLang(localeIdentifier, "lt"))
667 localeForConversion = "lt";
668 else
669 return lower();
670
671 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
672 CRASH();
673 int length = m_length;
674
675 RefPtr<StringImpl> upconverted = upconvertedString();
676 const UChar* source16 = upconverted->characters16();
677 return caseConvert(source16, length, u_strToLower, localeForConversion, this);
678 }
679
680 PassRefPtr<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier) {
681 // Use the more-optimized code path most of the time.
682 // Only Turkic (tr and az) languages and Greek require locale-specific
683 // lowercasing rules.
684 icu::UnicodeString transliteratorId;
685 const char* localeForConversion = 0;
686 if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(localeI dentifier, "az"))
687 localeForConversion = "tr";
688 else if (localeIdMatchesLang(localeIdentifier, "el"))
689 transliteratorId = UNICODE_STRING_SIMPLE("el-Upper");
690 else if (localeIdMatchesLang(localeIdentifier, "lt"))
691 localeForConversion = "lt";
692 else
693 return upper();
694
695 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
696 CRASH();
697 int length = m_length;
698
699 RefPtr<StringImpl> upconverted = upconvertedString();
700 const UChar* source16 = upconverted->characters16();
701
702 if (localeForConversion)
703 return caseConvert(source16, length, u_strToUpper, localeForConversion, this );
704
705 // TODO(jungshik): Cache transliterator if perf penaly warrants it for Greek.
706 UErrorCode status = U_ZERO_ERROR;
707 OwnPtr<icu::Transliterator> translit =
708 adoptPtr(icu::Transliterator::createInstance(transliteratorId, UTRANS_FORW ARD, status));
709 if (U_FAILURE(status))
710 return upper();
711
712 // target will be copy-on-write.
713 icu::UnicodeString target(false, source16, length);
714 translit->transliterate(target);
715
716 return create(target.getBuffer(), target.length());
717 }
718
719 PassRefPtr<StringImpl> StringImpl::fill(UChar character) {
720 if (!(character & ~0x7F)) {
721 LChar* data;
770 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 722 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
771 for (unsigned i = 0; i < m_length; ++i) 723 for (unsigned i = 0; i < m_length; ++i)
772 data[i] = character; 724 data[i] = static_cast<LChar>(character);
773 return newImpl.release(); 725 return newImpl.release();
774 } 726 }
775 727 UChar* data;
776 PassRefPtr<StringImpl> StringImpl::foldCase() 728 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
777 { 729 for (unsigned i = 0; i < m_length; ++i)
778 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::ma x())); 730 data[i] = character;
779 int32_t length = m_length; 731 return newImpl.release();
780 732 }
781 if (is8Bit()) { 733
782 // Do a faster loop for the case where all the characters are ASCII. 734 PassRefPtr<StringImpl> StringImpl::foldCase() {
783 LChar* data; 735 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max( )));
784 RefPtr <StringImpl>newImpl = createUninitialized(m_length, data); 736 int32_t length = m_length;
785 LChar ored = 0; 737
786 738 if (is8Bit()) {
787 for (int32_t i = 0; i < length; ++i) {
788 LChar c = characters8()[i];
789 data[i] = toASCIILower(c);
790 ored |= c;
791 }
792
793 if (!(ored & ~0x7F))
794 return newImpl.release();
795
796 // Do a slower implementation for cases that include non-ASCII Latin-1 c haracters.
797 for (int32_t i = 0; i < length; ++i)
798 data[i] = static_cast<LChar>(Unicode::toLower(characters8()[i]));
799
800 return newImpl.release();
801 }
802
803 // Do a faster loop for the case where all the characters are ASCII. 739 // Do a faster loop for the case where all the characters are ASCII.
804 UChar* data; 740 LChar* data;
805 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 741 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
806 UChar ored = 0; 742 LChar ored = 0;
743
807 for (int32_t i = 0; i < length; ++i) { 744 for (int32_t i = 0; i < length; ++i) {
808 UChar c = characters16()[i]; 745 LChar c = characters8()[i];
809 ored |= c; 746 data[i] = toASCIILower(c);
810 data[i] = toASCIILower(c); 747 ored |= c;
811 } 748 }
749
812 if (!(ored & ~0x7F)) 750 if (!(ored & ~0x7F))
813 return newImpl.release(); 751 return newImpl.release();
814 752
815 // Do a slower implementation for cases that include non-ASCII characters. 753 // Do a slower implementation for cases that include non-ASCII Latin-1 chara cters.
816 bool error; 754 for (int32_t i = 0; i < length; ++i)
817 int32_t realLength = Unicode::foldCase(data, length, characters16(), m_lengt h, &error); 755 data[i] = static_cast<LChar>(Unicode::toLower(characters8()[i]));
818 if (!error && realLength == length) 756
819 return newImpl.release();
820 newImpl = createUninitialized(realLength, data);
821 Unicode::foldCase(data, realLength, characters16(), m_length, &error);
822 if (error)
823 return this;
824 return newImpl.release(); 757 return newImpl.release();
758 }
759
760 // Do a faster loop for the case where all the characters are ASCII.
761 UChar* data;
762 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
763 UChar ored = 0;
764 for (int32_t i = 0; i < length; ++i) {
765 UChar c = characters16()[i];
766 ored |= c;
767 data[i] = toASCIILower(c);
768 }
769 if (!(ored & ~0x7F))
770 return newImpl.release();
771
772 // Do a slower implementation for cases that include non-ASCII characters.
773 bool error;
774 int32_t realLength = Unicode::foldCase(data, length, characters16(), m_length, &error);
775 if (!error && realLength == length)
776 return newImpl.release();
777 newImpl = createUninitialized(realLength, data);
778 Unicode::foldCase(data, realLength, characters16(), m_length, &error);
779 if (error)
780 return this;
781 return newImpl.release();
825 } 782 }
826 783
827 template <class UCharPredicate> 784 template <class UCharPredicate>
828 inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters(UCharPredicate predicate) 785 inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters(UCharPredicate predicate) {
829 { 786 if (!m_length)
830 if (!m_length) 787 return empty();
831 return empty(); 788
832 789 unsigned start = 0;
833 unsigned start = 0; 790 unsigned end = m_length - 1;
834 unsigned end = m_length - 1; 791
835 792 // skip white space from start
836 // skip white space from start 793 while (start <= end && predicate(is8Bit() ? characters8()[start] : characters1 6()[start]))
837 while (start <= end && predicate(is8Bit() ? characters8()[start] : character s16()[start])) 794 ++start;
838 ++start; 795
839 796 // only white space
840 // only white space 797 if (start > end)
841 if (start > end) 798 return empty();
842 return empty(); 799
843 800 // skip white space from end
844 // skip white space from end 801 while (end && predicate(is8Bit() ? characters8()[end] : characters16()[end]))
845 while (end && predicate(is8Bit() ? characters8()[end] : characters16()[end]) ) 802 --end;
846 --end; 803
847 804 if (!start && end == m_length - 1)
848 if (!start && end == m_length - 1) 805 return this;
849 return this; 806 if (is8Bit())
850 if (is8Bit()) 807 return create(characters8() + start, end + 1 - start);
851 return create(characters8() + start, end + 1 - start); 808 return create(characters16() + start, end + 1 - start);
852 return create(characters16() + start, end + 1 - start);
853 } 809 }
854 810
855 class UCharPredicate { 811 class UCharPredicate {
856 public: 812 public:
857 inline UCharPredicate(CharacterMatchFunctionPtr function): m_function(functi on) { } 813 inline UCharPredicate(CharacterMatchFunctionPtr function)
858 814 : m_function(function) {}
859 inline bool operator()(UChar ch) const 815
860 { 816 inline bool operator()(UChar ch) const {
861 return m_function(ch); 817 return m_function(ch);
862 } 818 }
863 819
864 private: 820 private:
865 const CharacterMatchFunctionPtr m_function; 821 const CharacterMatchFunctionPtr m_function;
866 }; 822 };
867 823
868 class SpaceOrNewlinePredicate { 824 class SpaceOrNewlinePredicate {
869 public: 825 public:
870 inline bool operator()(UChar ch) const 826 inline bool operator()(UChar ch) const {
871 { 827 return isSpaceOrNewline(ch);
872 return isSpaceOrNewline(ch); 828 }
873 }
874 }; 829 };
875 830
876 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace() 831 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace() {
877 { 832 return stripMatchedCharacters(SpaceOrNewlinePredicate());
878 return stripMatchedCharacters(SpaceOrNewlinePredicate()); 833 }
879 } 834
880 835 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhi teSpace) {
881 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhi teSpace) 836 return stripMatchedCharacters(UCharPredicate(isWhiteSpace));
882 {
883 return stripMatchedCharacters(UCharPredicate(isWhiteSpace));
884 } 837 }
885 838
886 template <typename CharType> 839 template <typename CharType>
887 ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters(const CharType * characters, CharacterMatchFunctionPtr findMatch) 840 ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters(const CharType * characters, CharacterMatchFunctionPtr findMatch) {
888 { 841 const CharType* from = characters;
889 const CharType* from = characters; 842 const CharType* fromend = from + m_length;
890 const CharType* fromend = from + m_length; 843
891 844 // Assume the common case will not remove any characters
892 // Assume the common case will not remove any characters 845 while (from != fromend && !findMatch(*from))
846 ++from;
847 if (from == fromend)
848 return this;
849
850 StringBuffer<CharType> data(m_length);
851 CharType* to = data.characters();
852 unsigned outc = from - characters;
853
854 if (outc)
855 memcpy(to, characters, outc * sizeof(CharType));
856
857 while (true) {
858 while (from != fromend && findMatch(*from))
859 ++from;
893 while (from != fromend && !findMatch(*from)) 860 while (from != fromend && !findMatch(*from))
861 to[outc++] = *from++;
862 if (from == fromend)
863 break;
864 }
865
866 data.shrink(outc);
867
868 return data.release();
869 }
870
871 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr fi ndMatch) {
872 if (is8Bit())
873 return removeCharacters(characters8(), findMatch);
874 return removeCharacters(characters16(), findMatch);
875 }
876
877 template <typename CharType, class UCharPredicate>
878 inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UChar Predicate predicate, StripBehavior stripBehavior) {
879 StringBuffer<CharType> data(m_length);
880
881 const CharType* from = getCharacters<CharType>();
882 const CharType* fromend = from + m_length;
883 int outc = 0;
884 bool changedToSpace = false;
885
886 CharType* to = data.characters();
887
888 if (stripBehavior == StripExtraWhiteSpace) {
889 while (true) {
890 while (from != fromend && predicate(*from)) {
891 if (*from != ' ')
892 changedToSpace = true;
894 ++from; 893 ++from;
895 if (from == fromend) 894 }
896 return this; 895 while (from != fromend && !predicate(*from))
897 896 to[outc++] = *from++;
898 StringBuffer<CharType> data(m_length); 897 if (from != fromend)
899 CharType* to = data.characters(); 898 to[outc++] = ' ';
900 unsigned outc = from - characters; 899 else
901 900 break;
902 if (outc) 901 }
903 memcpy(to, characters, outc * sizeof(CharType)); 902
904 903 if (outc > 0 && to[outc - 1] == ' ')
905 while (true) { 904 --outc;
906 while (from != fromend && findMatch(*from)) 905 } else {
907 ++from; 906 for (; from != fromend; ++from) {
908 while (from != fromend && !findMatch(*from)) 907 if (predicate(*from)) {
909 to[outc++] = *from++; 908 if (*from != ' ')
910 if (from == fromend) 909 changedToSpace = true;
911 break; 910 to[outc++] = ' ';
912 } 911 } else {
913 912 to[outc++] = *from;
914 data.shrink(outc); 913 }
915 914 }
916 return data.release(); 915 }
917 } 916
918 917 if (static_cast<unsigned>(outc) == m_length && !changedToSpace)
919 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr fi ndMatch) 918 return this;
920 { 919
921 if (is8Bit()) 920 data.shrink(outc);
922 return removeCharacters(characters8(), findMatch); 921
923 return removeCharacters(characters16(), findMatch); 922 return data.release();
924 } 923 }
925 924
926 template <typename CharType, class UCharPredicate> 925 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(StripBehavior stripBehavio r) {
927 inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UChar Predicate predicate, StripBehavior stripBehavior) 926 if (is8Bit())
928 { 927 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(SpaceOrNewlinePre dicate(), stripBehavior);
929 StringBuffer<CharType> data(m_length); 928 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(SpaceOrNewlinePredi cate(), stripBehavior);
930 929 }
931 const CharType* from = getCharacters<CharType>(); 930
932 const CharType* fromend = from + m_length; 931 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr is WhiteSpace, StripBehavior stripBehavior) {
933 int outc = 0; 932 if (is8Bit())
934 bool changedToSpace = false; 933 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(UCharPredicate(is WhiteSpace), stripBehavior);
935 934 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(UCharPredicate(isWh iteSpace), stripBehavior);
936 CharType* to = data.characters(); 935 }
937 936
938 if (stripBehavior == StripExtraWhiteSpace) { 937 int StringImpl::toIntStrict(bool* ok, int base) {
939 while (true) { 938 if (is8Bit())
940 while (from != fromend && predicate(*from)) { 939 return charactersToIntStrict(characters8(), m_length, ok, base);
941 if (*from != ' ') 940 return charactersToIntStrict(characters16(), m_length, ok, base);
942 changedToSpace = true; 941 }
943 ++from; 942
944 } 943 unsigned StringImpl::toUIntStrict(bool* ok, int base) {
945 while (from != fromend && !predicate(*from)) 944 if (is8Bit())
946 to[outc++] = *from++; 945 return charactersToUIntStrict(characters8(), m_length, ok, base);
947 if (from != fromend) 946 return charactersToUIntStrict(characters16(), m_length, ok, base);
948 to[outc++] = ' '; 947 }
949 else 948
950 break; 949 int64_t StringImpl::toInt64Strict(bool* ok, int base) {
951 } 950 if (is8Bit())
952 951 return charactersToInt64Strict(characters8(), m_length, ok, base);
953 if (outc > 0 && to[outc - 1] == ' ') 952 return charactersToInt64Strict(characters16(), m_length, ok, base);
954 --outc; 953 }
955 } else { 954
956 for (; from != fromend; ++from) { 955 uint64_t StringImpl::toUInt64Strict(bool* ok, int base) {
957 if (predicate(*from)) { 956 if (is8Bit())
958 if (*from != ' ') 957 return charactersToUInt64Strict(characters8(), m_length, ok, base);
959 changedToSpace = true; 958 return charactersToUInt64Strict(characters16(), m_length, ok, base);
960 to[outc++] = ' '; 959 }
961 } else { 960
962 to[outc++] = *from; 961 int StringImpl::toInt(bool* ok) {
963 } 962 if (is8Bit())
964 } 963 return charactersToInt(characters8(), m_length, ok);
965 } 964 return charactersToInt(characters16(), m_length, ok);
966 965 }
967 if (static_cast<unsigned>(outc) == m_length && !changedToSpace) 966
968 return this; 967 unsigned StringImpl::toUInt(bool* ok) {
969 968 if (is8Bit())
970 data.shrink(outc); 969 return charactersToUInt(characters8(), m_length, ok);
971 970 return charactersToUInt(characters16(), m_length, ok);
972 return data.release(); 971 }
973 } 972
974 973 int64_t StringImpl::toInt64(bool* ok) {
975 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(StripBehavior stripBehavio r) 974 if (is8Bit())
976 { 975 return charactersToInt64(characters8(), m_length, ok);
977 if (is8Bit()) 976 return charactersToInt64(characters16(), m_length, ok);
978 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(SpaceOrNewlin ePredicate(), stripBehavior); 977 }
979 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(SpaceOrNewlinePre dicate(), stripBehavior); 978
980 } 979 uint64_t StringImpl::toUInt64(bool* ok) {
981 980 if (is8Bit())
982 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr is WhiteSpace, StripBehavior stripBehavior) 981 return charactersToUInt64(characters8(), m_length, ok);
983 { 982 return charactersToUInt64(characters16(), m_length, ok);
984 if (is8Bit()) 983 }
985 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(UCharPredicat e(isWhiteSpace), stripBehavior); 984
986 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(UCharPredicate(is WhiteSpace), stripBehavior); 985 double StringImpl::toDouble(bool* ok) {
987 } 986 if (is8Bit())
988 987 return charactersToDouble(characters8(), m_length, ok);
989 int StringImpl::toIntStrict(bool* ok, int base) 988 return charactersToDouble(characters16(), m_length, ok);
990 { 989 }
991 if (is8Bit()) 990
992 return charactersToIntStrict(characters8(), m_length, ok, base); 991 float StringImpl::toFloat(bool* ok) {
993 return charactersToIntStrict(characters16(), m_length, ok, base); 992 if (is8Bit())
994 } 993 return charactersToFloat(characters8(), m_length, ok);
995 994 return charactersToFloat(characters16(), m_length, ok);
996 unsigned StringImpl::toUIntStrict(bool* ok, int base)
997 {
998 if (is8Bit())
999 return charactersToUIntStrict(characters8(), m_length, ok, base);
1000 return charactersToUIntStrict(characters16(), m_length, ok, base);
1001 }
1002
1003 int64_t StringImpl::toInt64Strict(bool* ok, int base)
1004 {
1005 if (is8Bit())
1006 return charactersToInt64Strict(characters8(), m_length, ok, base);
1007 return charactersToInt64Strict(characters16(), m_length, ok, base);
1008 }
1009
1010 uint64_t StringImpl::toUInt64Strict(bool* ok, int base)
1011 {
1012 if (is8Bit())
1013 return charactersToUInt64Strict(characters8(), m_length, ok, base);
1014 return charactersToUInt64Strict(characters16(), m_length, ok, base);
1015 }
1016
1017 int StringImpl::toInt(bool* ok)
1018 {
1019 if (is8Bit())
1020 return charactersToInt(characters8(), m_length, ok);
1021 return charactersToInt(characters16(), m_length, ok);
1022 }
1023
1024 unsigned StringImpl::toUInt(bool* ok)
1025 {
1026 if (is8Bit())
1027 return charactersToUInt(characters8(), m_length, ok);
1028 return charactersToUInt(characters16(), m_length, ok);
1029 }
1030
1031 int64_t StringImpl::toInt64(bool* ok)
1032 {
1033 if (is8Bit())
1034 return charactersToInt64(characters8(), m_length, ok);
1035 return charactersToInt64(characters16(), m_length, ok);
1036 }
1037
1038 uint64_t StringImpl::toUInt64(bool* ok)
1039 {
1040 if (is8Bit())
1041 return charactersToUInt64(characters8(), m_length, ok);
1042 return charactersToUInt64(characters16(), m_length, ok);
1043 }
1044
1045 double StringImpl::toDouble(bool* ok)
1046 {
1047 if (is8Bit())
1048 return charactersToDouble(characters8(), m_length, ok);
1049 return charactersToDouble(characters16(), m_length, ok);
1050 }
1051
1052 float StringImpl::toFloat(bool* ok)
1053 {
1054 if (is8Bit())
1055 return charactersToFloat(characters8(), m_length, ok);
1056 return charactersToFloat(characters16(), m_length, ok);
1057 } 995 }
1058 996
1059 // Table is based on ftp://ftp.unicode.org/Public/UNIDATA/CaseFolding.txt 997 // Table is based on ftp://ftp.unicode.org/Public/UNIDATA/CaseFolding.txt
1060 const UChar StringImpl::latin1CaseFoldTable[256] = { 998 const UChar StringImpl::latin1CaseFoldTable[256] = {
1061 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x00 09, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 999 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x00 09, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
1062 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x00 19, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 1000 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x00 19, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
1063 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x00 29, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 1001 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x00 29, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
1064 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x00 39, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 1002 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x00 39, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
1065 0x0040, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x00 69, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 1003 0x0040, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x00 69, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
1066 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x00 79, 0x007a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 1004 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x00 79, 0x007a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
1067 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x00 69, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 1005 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x00 69, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
1068 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x00 79, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, 1006 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x00 79, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
1069 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x00 89, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 1007 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x00 89, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
1070 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x00 99, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 1008 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x00 99, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
1071 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00 a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 1009 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00 a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
1072 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x03bc, 0x00b6, 0x00b7, 0x00b8, 0x00 b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 1010 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x03bc, 0x00b6, 0x00b7, 0x00b8, 0x00 b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
1073 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00 e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 1011 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00 e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
1074 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00d7, 0x00f8, 0x00 f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00df, 1012 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00d7, 0x00f8, 0x00 f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00df,
1075 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00 e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 1013 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00 e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
1076 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00 f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 1014 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00 f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
1077 }; 1015 };
1078 1016
1079 bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length) 1017 bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length) {
1080 { 1018 while (length--) {
1081 while (length--) { 1019 if (StringImpl::latin1CaseFoldTable[*a++] != StringImpl::latin1CaseFoldTable [*b++])
1082 if (StringImpl::latin1CaseFoldTable[*a++] != StringImpl::latin1CaseFoldT able[*b++]) 1020 return false;
1083 return false; 1021 }
1084 } 1022 return true;
1085 return true; 1023 }
1086 } 1024
1087 1025 bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length) {
1088 bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length) 1026 while (length--) {
1089 { 1027 if (foldCase(*a++) != StringImpl::latin1CaseFoldTable[*b++])
1090 while (length--) { 1028 return false;
1091 if (foldCase(*a++) != StringImpl::latin1CaseFoldTable[*b++]) 1029 }
1092 return false; 1030 return true;
1093 } 1031 }
1094 return true; 1032
1095 } 1033 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start) {
1096 1034 if (is8Bit())
1097 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start) 1035 return WTF::find(characters8(), m_length, matchFunction, start);
1098 { 1036 return WTF::find(characters16(), m_length, matchFunction, start);
1037 }
1038
1039 size_t StringImpl::find(const LChar* matchString, unsigned index) {
1040 // Check for null or empty string to match against
1041 if (!matchString)
1042 return kNotFound;
1043 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString));
1044 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());
1045 unsigned matchLength = matchStringLength;
1046 if (!matchLength)
1047 return min(index, length());
1048
1049 // Optimization 1: fast case for strings of length 1.
1050 if (matchLength == 1)
1051 return WTF::find(characters16(), length(), *matchString, index);
1052
1053 // Check index & matchLength are in range.
1054 if (index > length())
1055 return kNotFound;
1056 unsigned searchLength = length() - index;
1057 if (matchLength > searchLength)
1058 return kNotFound;
1059 // delta is the number of additional times to test; delta == 0 means test only once.
1060 unsigned delta = searchLength - matchLength;
1061
1062 const UChar* searchCharacters = characters16() + index;
1063
1064 // Optimization 2: keep a running hash of the strings,
1065 // only call equal if the hashes match.
1066 unsigned searchHash = 0;
1067 unsigned matchHash = 0;
1068 for (unsigned i = 0; i < matchLength; ++i) {
1069 searchHash += searchCharacters[i];
1070 matchHash += matchString[i];
1071 }
1072
1073 unsigned i = 0;
1074 // keep looping until we match
1075 while (searchHash != matchHash || !equal(searchCharacters + i, matchString, ma tchLength)) {
1076 if (i == delta)
1077 return kNotFound;
1078 searchHash += searchCharacters[i + matchLength];
1079 searchHash -= searchCharacters[i];
1080 ++i;
1081 }
1082 return index + i;
1083 }
1084
1085 template <typename CharType>
1086 ALWAYS_INLINE size_t findIgnoringCaseInternal(const CharType* searchCharacters, const LChar* matchString, unsigned index, unsigned searchLength, unsigned matchL ength) {
1087 // delta is the number of additional times to test; delta == 0 means test only once.
1088 unsigned delta = searchLength - matchLength;
1089
1090 unsigned i = 0;
1091 while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) {
1092 if (i == delta)
1093 return kNotFound;
1094 ++i;
1095 }
1096 return index + i;
1097 }
1098
1099 size_t StringImpl::findIgnoringCase(const LChar* matchString, unsigned index) {
1100 // Check for null or empty string to match against
1101 if (!matchString)
1102 return kNotFound;
1103 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString));
1104 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());
1105 unsigned matchLength = matchStringLength;
1106 if (!matchLength)
1107 return min(index, length());
1108
1109 // Check index & matchLength are in range.
1110 if (index > length())
1111 return kNotFound;
1112 unsigned searchLength = length() - index;
1113 if (matchLength > searchLength)
1114 return kNotFound;
1115
1116 if (is8Bit())
1117 return findIgnoringCaseInternal(characters8() + index, matchString, index, s earchLength, matchLength);
1118 return findIgnoringCaseInternal(characters16() + index, matchString, index, se archLength, matchLength);
1119 }
1120
1121 template <typename SearchCharacterType, typename MatchCharacterType>
1122 ALWAYS_INLINE static size_t findInternal(const SearchCharacterType* searchCharac ters, const MatchCharacterType* matchCharacters, unsigned index, unsigned search Length, unsigned matchLength) {
1123 // Optimization: keep a running hash of the strings,
1124 // only call equal() if the hashes match.
1125
1126 // delta is the number of additional times to test; delta == 0 means test only once.
1127 unsigned delta = searchLength - matchLength;
1128
1129 unsigned searchHash = 0;
1130 unsigned matchHash = 0;
1131
1132 for (unsigned i = 0; i < matchLength; ++i) {
1133 searchHash += searchCharacters[i];
1134 matchHash += matchCharacters[i];
1135 }
1136
1137 unsigned i = 0;
1138 // keep looping until we match
1139 while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacters , matchLength)) {
1140 if (i == delta)
1141 return kNotFound;
1142 searchHash += searchCharacters[i + matchLength];
1143 searchHash -= searchCharacters[i];
1144 ++i;
1145 }
1146 return index + i;
1147 }
1148
1149 size_t StringImpl::find(StringImpl* matchString) {
1150 // Check for null string to match against
1151 if (UNLIKELY(!matchString))
1152 return kNotFound;
1153 unsigned matchLength = matchString->length();
1154
1155 // Optimization 1: fast case for strings of length 1.
1156 if (matchLength == 1) {
1157 if (is8Bit()) {
1158 if (matchString->is8Bit())
1159 return WTF::find(characters8(), length(), matchString->characters8()[0]) ;
1160 return WTF::find(characters8(), length(), matchString->characters16()[0]);
1161 }
1162 if (matchString->is8Bit())
1163 return WTF::find(characters16(), length(), matchString->characters8()[0]);
1164 return WTF::find(characters16(), length(), matchString->characters16()[0]);
1165 }
1166
1167 // Check matchLength is in range.
1168 if (matchLength > length())
1169 return kNotFound;
1170
1171 // Check for empty string to match against
1172 if (UNLIKELY(!matchLength))
1173 return 0;
1174
1175 if (is8Bit()) {
1176 if (matchString->is8Bit())
1177 return findInternal(characters8(), matchString->characters8(), 0, length() , matchLength);
1178 return findInternal(characters8(), matchString->characters16(), 0, length(), matchLength);
1179 }
1180
1181 if (matchString->is8Bit())
1182 return findInternal(characters16(), matchString->characters8(), 0, length(), matchLength);
1183
1184 return findInternal(characters16(), matchString->characters16(), 0, length(), matchLength);
1185 }
1186
1187 size_t StringImpl::find(StringImpl* matchString, unsigned index) {
1188 // Check for null or empty string to match against
1189 if (UNLIKELY(!matchString))
1190 return kNotFound;
1191
1192 unsigned matchLength = matchString->length();
1193
1194 // Optimization 1: fast case for strings of length 1.
1195 if (matchLength == 1) {
1099 if (is8Bit()) 1196 if (is8Bit())
1100 return WTF::find(characters8(), m_length, matchFunction, start); 1197 return WTF::find(characters8(), length(), (*matchString)[0], index);
1101 return WTF::find(characters16(), m_length, matchFunction, start); 1198 return WTF::find(characters16(), length(), (*matchString)[0], index);
1102 } 1199 }
1103 1200
1104 size_t StringImpl::find(const LChar* matchString, unsigned index) 1201 if (UNLIKELY(!matchLength))
1105 { 1202 return min(index, length());
1106 // Check for null or empty string to match against 1203
1107 if (!matchString) 1204 // Check index & matchLength are in range.
1108 return kNotFound; 1205 if (index > length())
1109 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString) ); 1206 return kNotFound;
1110 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max()); 1207 unsigned searchLength = length() - index;
1111 unsigned matchLength = matchStringLength; 1208 if (matchLength > searchLength)
1112 if (!matchLength) 1209 return kNotFound;
1113 return min(index, length()); 1210
1114 1211 if (is8Bit()) {
1115 // Optimization 1: fast case for strings of length 1. 1212 if (matchString->is8Bit())
1116 if (matchLength == 1) 1213 return findInternal(characters8() + index, matchString->characters8(), ind ex, searchLength, matchLength);
1117 return WTF::find(characters16(), length(), *matchString, index); 1214 return findInternal(characters8() + index, matchString->characters16(), inde x, searchLength, matchLength);
1118 1215 }
1119 // Check index & matchLength are in range. 1216
1120 if (index > length()) 1217 if (matchString->is8Bit())
1121 return kNotFound; 1218 return findInternal(characters16() + index, matchString->characters8(), inde x, searchLength, matchLength);
1122 unsigned searchLength = length() - index; 1219
1123 if (matchLength > searchLength) 1220 return findInternal(characters16() + index, matchString->characters16(), index , searchLength, matchLength);
1124 return kNotFound; 1221 }
1125 // delta is the number of additional times to test; delta == 0 means test on ly once. 1222
1126 unsigned delta = searchLength - matchLength; 1223 template <typename SearchCharacterType, typename MatchCharacterType>
1127 1224 ALWAYS_INLINE static size_t findIgnoringCaseInner(const SearchCharacterType* sea rchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsign ed searchLength, unsigned matchLength) {
1128 const UChar* searchCharacters = characters16() + index; 1225 // delta is the number of additional times to test; delta == 0 means test only once.
1129 1226 unsigned delta = searchLength - matchLength;
1130 // Optimization 2: keep a running hash of the strings, 1227
1131 // only call equal if the hashes match. 1228 unsigned i = 0;
1132 unsigned searchHash = 0; 1229 // keep looping until we match
1133 unsigned matchHash = 0; 1230 while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) {
1134 for (unsigned i = 0; i < matchLength; ++i) { 1231 if (i == delta)
1135 searchHash += searchCharacters[i]; 1232 return kNotFound;
1136 matchHash += matchString[i]; 1233 ++i;
1137 } 1234 }
1138 1235 return index + i;
1139 unsigned i = 0; 1236 }
1140 // keep looping until we match 1237
1141 while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) { 1238 size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index) {
1142 if (i == delta) 1239 // Check for null or empty string to match against
1143 return kNotFound; 1240 if (!matchString)
1144 searchHash += searchCharacters[i + matchLength]; 1241 return kNotFound;
1145 searchHash -= searchCharacters[i]; 1242 unsigned matchLength = matchString->length();
1146 ++i; 1243 if (!matchLength)
1147 } 1244 return min(index, length());
1148 return index + i; 1245
1149 } 1246 // Check index & matchLength are in range.
1150 1247 if (index > length())
1151 template<typename CharType> 1248 return kNotFound;
1152 ALWAYS_INLINE size_t findIgnoringCaseInternal(const CharType* searchCharacters, const LChar* matchString, unsigned index, unsigned searchLength, unsigned matchL ength) 1249 unsigned searchLength = length() - index;
1153 { 1250 if (matchLength > searchLength)
1154 // delta is the number of additional times to test; delta == 0 means test on ly once. 1251 return kNotFound;
1155 unsigned delta = searchLength - matchLength; 1252
1156 1253 if (is8Bit()) {
1157 unsigned i = 0; 1254 if (matchString->is8Bit())
1158 while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) { 1255 return findIgnoringCaseInner(characters8() + index, matchString->character s8(), index, searchLength, matchLength);
1159 if (i == delta) 1256 return findIgnoringCaseInner(characters8() + index, matchString->characters1 6(), index, searchLength, matchLength);
1160 return kNotFound; 1257 }
1161 ++i; 1258
1162 } 1259 if (matchString->is8Bit())
1163 return index + i; 1260 return findIgnoringCaseInner(characters16() + index, matchString->characters 8(), index, searchLength, matchLength);
1164 } 1261
1165 1262 return findIgnoringCaseInner(characters16() + index, matchString->characters16 (), index, searchLength, matchLength);
1166 size_t StringImpl::findIgnoringCase(const LChar* matchString, unsigned index) 1263 }
1167 { 1264
1168 // Check for null or empty string to match against 1265 size_t StringImpl::findNextLineStart(unsigned index) {
1169 if (!matchString) 1266 if (is8Bit())
1170 return kNotFound; 1267 return WTF::findNextLineStart(characters8(), m_length, index);
1171 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString) ); 1268 return WTF::findNextLineStart(characters16(), m_length, index);
1172 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max()); 1269 }
1173 unsigned matchLength = matchStringLength; 1270
1174 if (!matchLength) 1271 size_t StringImpl::count(LChar c) const {
1175 return min(index, length()); 1272 int count = 0;
1176 1273 if (is8Bit()) {
1177 // Check index & matchLength are in range. 1274 for (size_t i = 0; i < m_length; ++i)
1178 if (index > length()) 1275 count += characters8()[i] == c;
1179 return kNotFound; 1276 } else {
1180 unsigned searchLength = length() - index; 1277 for (size_t i = 0; i < m_length; ++i)
1181 if (matchLength > searchLength) 1278 count += characters16()[i] == c;
1182 return kNotFound; 1279 }
1183 1280 return count;
1281 }
1282
1283 size_t StringImpl::reverseFind(UChar c, unsigned index) {
1284 if (is8Bit())
1285 return WTF::reverseFind(characters8(), m_length, c, index);
1286 return WTF::reverseFind(characters16(), m_length, c, index);
1287 }
1288
1289 template <typename SearchCharacterType, typename MatchCharacterType>
1290 ALWAYS_INLINE static size_t reverseFindInner(const SearchCharacterType* searchCh aracters, const MatchCharacterType* matchCharacters, unsigned index, unsigned le ngth, unsigned matchLength) {
1291 // Optimization: keep a running hash of the strings,
1292 // only call equal if the hashes match.
1293
1294 // delta is the number of additional times to test; delta == 0 means test only once.
1295 unsigned delta = min(index, length - matchLength);
1296
1297 unsigned searchHash = 0;
1298 unsigned matchHash = 0;
1299 for (unsigned i = 0; i < matchLength; ++i) {
1300 searchHash += searchCharacters[delta + i];
1301 matchHash += matchCharacters[i];
1302 }
1303
1304 // keep looping until we match
1305 while (searchHash != matchHash || !equal(searchCharacters + delta, matchCharac ters, matchLength)) {
1306 if (!delta)
1307 return kNotFound;
1308 --delta;
1309 searchHash -= searchCharacters[delta + matchLength];
1310 searchHash += searchCharacters[delta];
1311 }
1312 return delta;
1313 }
1314
1315 size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index) {
1316 // Check for null or empty string to match against
1317 if (!matchString)
1318 return kNotFound;
1319 unsigned matchLength = matchString->length();
1320 unsigned ourLength = length();
1321 if (!matchLength)
1322 return min(index, ourLength);
1323
1324 // Optimization 1: fast case for strings of length 1.
1325 if (matchLength == 1) {
1184 if (is8Bit()) 1326 if (is8Bit())
1185 return findIgnoringCaseInternal(characters8() + index, matchString, inde x, searchLength, matchLength); 1327 return WTF::reverseFind(characters8(), ourLength, (*matchString)[0], index );
1186 return findIgnoringCaseInternal(characters16() + index, matchString, index, searchLength, matchLength); 1328 return WTF::reverseFind(characters16(), ourLength, (*matchString)[0], index) ;
1329 }
1330
1331 // Check index & matchLength are in range.
1332 if (matchLength > ourLength)
1333 return kNotFound;
1334
1335 if (is8Bit()) {
1336 if (matchString->is8Bit())
1337 return reverseFindInner(characters8(), matchString->characters8(), index, ourLength, matchLength);
1338 return reverseFindInner(characters8(), matchString->characters16(), index, o urLength, matchLength);
1339 }
1340
1341 if (matchString->is8Bit())
1342 return reverseFindInner(characters16(), matchString->characters8(), index, o urLength, matchLength);
1343
1344 return reverseFindInner(characters16(), matchString->characters16(), index, ou rLength, matchLength);
1187 } 1345 }
1188 1346
1189 template <typename SearchCharacterType, typename MatchCharacterType> 1347 template <typename SearchCharacterType, typename MatchCharacterType>
1190 ALWAYS_INLINE static size_t findInternal(const SearchCharacterType* searchCharac ters, const MatchCharacterType* matchCharacters, unsigned index, unsigned search Length, unsigned matchLength) 1348 ALWAYS_INLINE static size_t reverseFindIgnoringCaseInner(const SearchCharacterTy pe* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength) {
1191 { 1349 // delta is the number of additional times to test; delta == 0 means test only once.
1192 // Optimization: keep a running hash of the strings, 1350 unsigned delta = min(index, length - matchLength);
1193 // only call equal() if the hashes match. 1351
1194 1352 // keep looping until we match
1195 // delta is the number of additional times to test; delta == 0 means test on ly once. 1353 while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLeng th)) {
1196 unsigned delta = searchLength - matchLength; 1354 if (!delta)
1197 1355 return kNotFound;
1198 unsigned searchHash = 0; 1356 --delta;
1199 unsigned matchHash = 0; 1357 }
1200 1358 return delta;
1201 for (unsigned i = 0; i < matchLength; ++i) { 1359 }
1202 searchHash += searchCharacters[i]; 1360
1203 matchHash += matchCharacters[i]; 1361 size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned ind ex) {
1204 } 1362 // Check for null or empty string to match against
1205 1363 if (!matchString)
1206 unsigned i = 0; 1364 return kNotFound;
1207 // keep looping until we match 1365 unsigned matchLength = matchString->length();
1208 while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacte rs, matchLength)) { 1366 unsigned ourLength = length();
1209 if (i == delta) 1367 if (!matchLength)
1210 return kNotFound; 1368 return min(index, ourLength);
1211 searchHash += searchCharacters[i + matchLength]; 1369
1212 searchHash -= searchCharacters[i]; 1370 // Check index & matchLength are in range.
1213 ++i; 1371 if (matchLength > ourLength)
1214 } 1372 return kNotFound;
1215 return index + i; 1373
1216 } 1374 if (is8Bit()) {
1217
1218 size_t StringImpl::find(StringImpl* matchString)
1219 {
1220 // Check for null string to match against
1221 if (UNLIKELY(!matchString))
1222 return kNotFound;
1223 unsigned matchLength = matchString->length();
1224
1225 // Optimization 1: fast case for strings of length 1.
1226 if (matchLength == 1) {
1227 if (is8Bit()) {
1228 if (matchString->is8Bit())
1229 return WTF::find(characters8(), length(), matchString->character s8()[0]);
1230 return WTF::find(characters8(), length(), matchString->characters16( )[0]);
1231 }
1232 if (matchString->is8Bit())
1233 return WTF::find(characters16(), length(), matchString->characters8( )[0]);
1234 return WTF::find(characters16(), length(), matchString->characters16()[0 ]);
1235 }
1236
1237 // Check matchLength is in range.
1238 if (matchLength > length())
1239 return kNotFound;
1240
1241 // Check for empty string to match against
1242 if (UNLIKELY(!matchLength))
1243 return 0;
1244
1245 if (is8Bit()) {
1246 if (matchString->is8Bit())
1247 return findInternal(characters8(), matchString->characters8(), 0, le ngth(), matchLength);
1248 return findInternal(characters8(), matchString->characters16(), 0, lengt h(), matchLength);
1249 }
1250
1251 if (matchString->is8Bit()) 1375 if (matchString->is8Bit())
1252 return findInternal(characters16(), matchString->characters8(), 0, lengt h(), matchLength); 1376 return reverseFindIgnoringCaseInner(characters8(), matchString->characters 8(), index, ourLength, matchLength);
1253 1377 return reverseFindIgnoringCaseInner(characters8(), matchString->characters16 (), index, ourLength, matchLength);
1254 return findInternal(characters16(), matchString->characters16(), 0, length() , matchLength); 1378 }
1255 } 1379
1256 1380 if (matchString->is8Bit())
1257 size_t StringImpl::find(StringImpl* matchString, unsigned index) 1381 return reverseFindIgnoringCaseInner(characters16(), matchString->characters8 (), index, ourLength, matchLength);
1258 { 1382
1259 // Check for null or empty string to match against 1383 return reverseFindIgnoringCaseInner(characters16(), matchString->characters16( ), index, ourLength, matchLength);
1260 if (UNLIKELY(!matchString)) 1384 }
1261 return kNotFound; 1385
1262 1386 ALWAYS_INLINE static bool equalInner(const StringImpl* stringImpl, unsigned star tOffset, const char* matchString, unsigned matchLength, TextCaseSensitivity case Sensitivity) {
1263 unsigned matchLength = matchString->length(); 1387 ASSERT(stringImpl);
1264 1388 ASSERT(matchLength <= stringImpl->length());
1265 // Optimization 1: fast case for strings of length 1. 1389 ASSERT(startOffset + matchLength <= stringImpl->length());
1266 if (matchLength == 1) { 1390
1267 if (is8Bit()) 1391 if (caseSensitivity == TextCaseSensitive) {
1268 return WTF::find(characters8(), length(), (*matchString)[0], index);
1269 return WTF::find(characters16(), length(), (*matchString)[0], index);
1270 }
1271
1272 if (UNLIKELY(!matchLength))
1273 return min(index, length());
1274
1275 // Check index & matchLength are in range.
1276 if (index > length())
1277 return kNotFound;
1278 unsigned searchLength = length() - index;
1279 if (matchLength > searchLength)
1280 return kNotFound;
1281
1282 if (is8Bit()) {
1283 if (matchString->is8Bit())
1284 return findInternal(characters8() + index, matchString->characters8( ), index, searchLength, matchLength);
1285 return findInternal(characters8() + index, matchString->characters16(), index, searchLength, matchLength);
1286 }
1287
1288 if (matchString->is8Bit())
1289 return findInternal(characters16() + index, matchString->characters8(), index, searchLength, matchLength);
1290
1291 return findInternal(characters16() + index, matchString->characters16(), ind ex, searchLength, matchLength);
1292 }
1293
1294 template <typename SearchCharacterType, typename MatchCharacterType>
1295 ALWAYS_INLINE static size_t findIgnoringCaseInner(const SearchCharacterType* sea rchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsign ed searchLength, unsigned matchLength)
1296 {
1297 // delta is the number of additional times to test; delta == 0 means test on ly once.
1298 unsigned delta = searchLength - matchLength;
1299
1300 unsigned i = 0;
1301 // keep looping until we match
1302 while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength )) {
1303 if (i == delta)
1304 return kNotFound;
1305 ++i;
1306 }
1307 return index + i;
1308 }
1309
1310 size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index)
1311 {
1312 // Check for null or empty string to match against
1313 if (!matchString)
1314 return kNotFound;
1315 unsigned matchLength = matchString->length();
1316 if (!matchLength)
1317 return min(index, length());
1318
1319 // Check index & matchLength are in range.
1320 if (index > length())
1321 return kNotFound;
1322 unsigned searchLength = length() - index;
1323 if (matchLength > searchLength)
1324 return kNotFound;
1325
1326 if (is8Bit()) {
1327 if (matchString->is8Bit())
1328 return findIgnoringCaseInner(characters8() + index, matchString->cha racters8(), index, searchLength, matchLength);
1329 return findIgnoringCaseInner(characters8() + index, matchString->charact ers16(), index, searchLength, matchLength);
1330 }
1331
1332 if (matchString->is8Bit())
1333 return findIgnoringCaseInner(characters16() + index, matchString->charac ters8(), index, searchLength, matchLength);
1334
1335 return findIgnoringCaseInner(characters16() + index, matchString->characters 16(), index, searchLength, matchLength);
1336 }
1337
1338 size_t StringImpl::findNextLineStart(unsigned index)
1339 {
1340 if (is8Bit())
1341 return WTF::findNextLineStart(characters8(), m_length, index);
1342 return WTF::findNextLineStart(characters16(), m_length, index);
1343 }
1344
1345 size_t StringImpl::count(LChar c) const
1346 {
1347 int count = 0;
1348 if (is8Bit()) {
1349 for (size_t i = 0; i < m_length; ++i)
1350 count += characters8()[i] == c;
1351 } else {
1352 for (size_t i = 0; i < m_length; ++i)
1353 count += characters16()[i] == c;
1354 }
1355 return count;
1356 }
1357
1358 size_t StringImpl::reverseFind(UChar c, unsigned index)
1359 {
1360 if (is8Bit())
1361 return WTF::reverseFind(characters8(), m_length, c, index);
1362 return WTF::reverseFind(characters16(), m_length, c, index);
1363 }
1364
1365 template <typename SearchCharacterType, typename MatchCharacterType>
1366 ALWAYS_INLINE static size_t reverseFindInner(const SearchCharacterType* searchCh aracters, const MatchCharacterType* matchCharacters, unsigned index, unsigned le ngth, unsigned matchLength)
1367 {
1368 // Optimization: keep a running hash of the strings,
1369 // only call equal if the hashes match.
1370
1371 // delta is the number of additional times to test; delta == 0 means test on ly once.
1372 unsigned delta = min(index, length - matchLength);
1373
1374 unsigned searchHash = 0;
1375 unsigned matchHash = 0;
1376 for (unsigned i = 0; i < matchLength; ++i) {
1377 searchHash += searchCharacters[delta + i];
1378 matchHash += matchCharacters[i];
1379 }
1380
1381 // keep looping until we match
1382 while (searchHash != matchHash || !equal(searchCharacters + delta, matchChar acters, matchLength)) {
1383 if (!delta)
1384 return kNotFound;
1385 --delta;
1386 searchHash -= searchCharacters[delta + matchLength];
1387 searchHash += searchCharacters[delta];
1388 }
1389 return delta;
1390 }
1391
1392 size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index)
1393 {
1394 // Check for null or empty string to match against
1395 if (!matchString)
1396 return kNotFound;
1397 unsigned matchLength = matchString->length();
1398 unsigned ourLength = length();
1399 if (!matchLength)
1400 return min(index, ourLength);
1401
1402 // Optimization 1: fast case for strings of length 1.
1403 if (matchLength == 1) {
1404 if (is8Bit())
1405 return WTF::reverseFind(characters8(), ourLength, (*matchString)[0], index);
1406 return WTF::reverseFind(characters16(), ourLength, (*matchString)[0], in dex);
1407 }
1408
1409 // Check index & matchLength are in range.
1410 if (matchLength > ourLength)
1411 return kNotFound;
1412
1413 if (is8Bit()) {
1414 if (matchString->is8Bit())
1415 return reverseFindInner(characters8(), matchString->characters8(), i ndex, ourLength, matchLength);
1416 return reverseFindInner(characters8(), matchString->characters16(), inde x, ourLength, matchLength);
1417 }
1418
1419 if (matchString->is8Bit())
1420 return reverseFindInner(characters16(), matchString->characters8(), inde x, ourLength, matchLength);
1421
1422 return reverseFindInner(characters16(), matchString->characters16(), index, ourLength, matchLength);
1423 }
1424
1425 template <typename SearchCharacterType, typename MatchCharacterType>
1426 ALWAYS_INLINE static size_t reverseFindIgnoringCaseInner(const SearchCharacterTy pe* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength)
1427 {
1428 // delta is the number of additional times to test; delta == 0 means test on ly once.
1429 unsigned delta = min(index, length - matchLength);
1430
1431 // keep looping until we match
1432 while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLe ngth)) {
1433 if (!delta)
1434 return kNotFound;
1435 --delta;
1436 }
1437 return delta;
1438 }
1439
1440 size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned ind ex)
1441 {
1442 // Check for null or empty string to match against
1443 if (!matchString)
1444 return kNotFound;
1445 unsigned matchLength = matchString->length();
1446 unsigned ourLength = length();
1447 if (!matchLength)
1448 return min(index, ourLength);
1449
1450 // Check index & matchLength are in range.
1451 if (matchLength > ourLength)
1452 return kNotFound;
1453
1454 if (is8Bit()) {
1455 if (matchString->is8Bit())
1456 return reverseFindIgnoringCaseInner(characters8(), matchString->char acters8(), index, ourLength, matchLength);
1457 return reverseFindIgnoringCaseInner(characters8(), matchString->characte rs16(), index, ourLength, matchLength);
1458 }
1459
1460 if (matchString->is8Bit())
1461 return reverseFindIgnoringCaseInner(characters16(), matchString->charact ers8(), index, ourLength, matchLength);
1462
1463 return reverseFindIgnoringCaseInner(characters16(), matchString->characters1 6(), index, ourLength, matchLength);
1464 }
1465
1466 ALWAYS_INLINE static bool equalInner(const StringImpl* stringImpl, unsigned star tOffset, const char* matchString, unsigned matchLength, TextCaseSensitivity case Sensitivity)
1467 {
1468 ASSERT(stringImpl);
1469 ASSERT(matchLength <= stringImpl->length());
1470 ASSERT(startOffset + matchLength <= stringImpl->length());
1471
1472 if (caseSensitivity == TextCaseSensitive) {
1473 if (stringImpl->is8Bit())
1474 return equal(stringImpl->characters8() + startOffset, reinterpret_ca st<const LChar*>(matchString), matchLength);
1475 return equal(stringImpl->characters16() + startOffset, reinterpret_cast< const LChar*>(matchString), matchLength);
1476 }
1477 if (stringImpl->is8Bit()) 1392 if (stringImpl->is8Bit())
1478 return equalIgnoringCase(stringImpl->characters8() + startOffset, reinte rpret_cast<const LChar*>(matchString), matchLength); 1393 return equal(stringImpl->characters8() + startOffset, reinterpret_cast<con st LChar*>(matchString), matchLength);
1479 return equalIgnoringCase(stringImpl->characters16() + startOffset, reinterpr et_cast<const LChar*>(matchString), matchLength); 1394 return equal(stringImpl->characters16() + startOffset, reinterpret_cast<cons t LChar*>(matchString), matchLength);
1480 } 1395 }
1481 1396 if (stringImpl->is8Bit())
1482 bool StringImpl::startsWith(UChar character) const 1397 return equalIgnoringCase(stringImpl->characters8() + startOffset, reinterpre t_cast<const LChar*>(matchString), matchLength);
1483 { 1398 return equalIgnoringCase(stringImpl->characters16() + startOffset, reinterpret _cast<const LChar*>(matchString), matchLength);
1484 return m_length && (*this)[0] == character; 1399 }
1485 } 1400
1486 1401 bool StringImpl::startsWith(UChar character) const {
1487 bool StringImpl::startsWith(const char* matchString, unsigned matchLength, TextC aseSensitivity caseSensitivity) const 1402 return m_length && (*this)[0] == character;
1488 { 1403 }
1489 ASSERT(matchLength); 1404
1490 if (matchLength > length()) 1405 bool StringImpl::startsWith(const char* matchString, unsigned matchLength, TextC aseSensitivity caseSensitivity) const {
1491 return false; 1406 ASSERT(matchLength);
1492 return equalInner(this, 0, matchString, matchLength, caseSensitivity); 1407 if (matchLength > length())
1493 }
1494
1495 bool StringImpl::endsWith(StringImpl* matchString, TextCaseSensitivity caseSensi tivity)
1496 {
1497 ASSERT(matchString);
1498 if (m_length >= matchString->m_length) {
1499 unsigned start = m_length - matchString->m_length;
1500 if (caseSensitivity == TextCaseSensitive)
1501 return find(matchString, start) == start;
1502 return findIgnoringCase(matchString, start) == start;
1503 }
1504 return false; 1408 return false;
1505 } 1409 return equalInner(this, 0, matchString, matchLength, caseSensitivity);
1506 1410 }
1507 bool StringImpl::endsWith(UChar character) const 1411
1508 { 1412 bool StringImpl::endsWith(StringImpl* matchString, TextCaseSensitivity caseSensi tivity) {
1509 return m_length && (*this)[m_length - 1] == character; 1413 ASSERT(matchString);
1510 } 1414 if (m_length >= matchString->m_length) {
1511 1415 unsigned start = m_length - matchString->m_length;
1512 bool StringImpl::endsWith(const char* matchString, unsigned matchLength, TextCas eSensitivity caseSensitivity) const 1416 if (caseSensitivity == TextCaseSensitive)
1513 { 1417 return find(matchString, start) == start;
1514 ASSERT(matchLength); 1418 return findIgnoringCase(matchString, start) == start;
1515 if (matchLength > length()) 1419 }
1516 return false; 1420 return false;
1517 unsigned startOffset = length() - matchLength; 1421 }
1518 return equalInner(this, startOffset, matchString, matchLength, caseSensitivi ty); 1422
1519 } 1423 bool StringImpl::endsWith(UChar character) const {
1520 1424 return m_length && (*this)[m_length - 1] == character;
1521 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC) 1425 }
1522 { 1426
1523 if (oldC == newC) 1427 bool StringImpl::endsWith(const char* matchString, unsigned matchLength, TextCas eSensitivity caseSensitivity) const {
1524 return this; 1428 ASSERT(matchLength);
1525 1429 if (matchLength > length())
1526 if (find(oldC) == kNotFound) 1430 return false;
1527 return this; 1431 unsigned startOffset = length() - matchLength;
1528 1432 return equalInner(this, startOffset, matchString, matchLength, caseSensitivity );
1529 unsigned i; 1433 }
1530 if (is8Bit()) { 1434
1531 if (newC <= 0xff) { 1435 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC) {
1532 LChar* data; 1436 if (oldC == newC)
1533 LChar oldChar = static_cast<LChar>(oldC); 1437 return this;
1534 LChar newChar = static_cast<LChar>(newC); 1438
1535 1439 if (find(oldC) == kNotFound)
1536 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 1440 return this;
1537 1441
1538 for (i = 0; i != m_length; ++i) { 1442 unsigned i;
1539 LChar ch = characters8()[i]; 1443 if (is8Bit()) {
1540 if (ch == oldChar) 1444 if (newC <= 0xff) {
1541 ch = newChar; 1445 LChar* data;
1542 data[i] = ch; 1446 LChar oldChar = static_cast<LChar>(oldC);
1543 } 1447 LChar newChar = static_cast<LChar>(newC);
1544 return newImpl.release(); 1448
1545 } 1449 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1546 1450
1547 // There is the possibility we need to up convert from 8 to 16 bit, 1451 for (i = 0; i != m_length; ++i) {
1548 // create a 16 bit string for the result. 1452 LChar ch = characters8()[i];
1549 UChar* data; 1453 if (ch == oldChar)
1550 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 1454 ch = newChar;
1551 1455 data[i] = ch;
1552 for (i = 0; i != m_length; ++i) { 1456 }
1553 UChar ch = characters8()[i]; 1457 return newImpl.release();
1554 if (ch == oldC) 1458 }
1555 ch = newC; 1459
1556 data[i] = ch; 1460 // There is the possibility we need to up convert from 8 to 16 bit,
1557 } 1461 // create a 16 bit string for the result.
1558
1559 return newImpl.release();
1560 }
1561
1562 UChar* data; 1462 UChar* data;
1563 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 1463 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1564 1464
1565 for (i = 0; i != m_length; ++i) { 1465 for (i = 0; i != m_length; ++i) {
1566 UChar ch = characters16()[i]; 1466 UChar ch = characters8()[i];
1567 if (ch == oldC) 1467 if (ch == oldC)
1568 ch = newC; 1468 ch = newC;
1569 data[i] = ch; 1469 data[i] = ch;
1570 } 1470 }
1471
1571 return newImpl.release(); 1472 return newImpl.release();
1572 } 1473 }
1573 1474
1574 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToR eplace, StringImpl* str) 1475 UChar* data;
1575 { 1476 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1576 position = min(position, length()); 1477
1577 lengthToReplace = min(lengthToReplace, length() - position); 1478 for (i = 0; i != m_length; ++i) {
1578 unsigned lengthToInsert = str ? str->length() : 0; 1479 UChar ch = characters16()[i];
1579 if (!lengthToReplace && !lengthToInsert) 1480 if (ch == oldC)
1580 return this; 1481 ch = newC;
1581 1482 data[i] = ch;
1582 RELEASE_ASSERT((length() - lengthToReplace) < (numeric_limits<unsigned>::max () - lengthToInsert)); 1483 }
1583 1484 return newImpl.release();
1584 if (is8Bit() && (!str || str->is8Bit())) { 1485 }
1585 LChar* data; 1486
1586 RefPtr<StringImpl> newImpl = 1487 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToR eplace, StringImpl* str) {
1587 createUninitialized(length() - lengthToReplace + lengthToInsert, data); 1488 position = min(position, length());
1588 memcpy(data, characters8(), position * sizeof(LChar)); 1489 lengthToReplace = min(lengthToReplace, length() - position);
1589 if (str) 1490 unsigned lengthToInsert = str ? str->length() : 0;
1590 memcpy(data + position, str->characters8(), lengthToInsert * sizeof( LChar)); 1491 if (!lengthToReplace && !lengthToInsert)
1591 memcpy(data + position + lengthToInsert, characters8() + position + leng thToReplace, 1492 return this;
1592 (length() - position - lengthToReplace) * sizeof(LChar)); 1493
1593 return newImpl.release(); 1494 RELEASE_ASSERT((length() - lengthToReplace) < (numeric_limits<unsigned>::max() - lengthToInsert));
1594 } 1495
1595 UChar* data; 1496 if (is8Bit() && (!str || str->is8Bit())) {
1497 LChar* data;
1596 RefPtr<StringImpl> newImpl = 1498 RefPtr<StringImpl> newImpl =
1597 createUninitialized(length() - lengthToReplace + lengthToInsert, data); 1499 createUninitialized(length() - lengthToReplace + lengthToInsert, data);
1598 if (is8Bit()) 1500 memcpy(data, characters8(), position * sizeof(LChar));
1599 for (unsigned i = 0; i < position; ++i) 1501 if (str)
1600 data[i] = characters8()[i]; 1502 memcpy(data + position, str->characters8(), lengthToInsert * sizeof(LChar) );
1503 memcpy(data + position + lengthToInsert, characters8() + position + lengthTo Replace,
1504 (length() - position - lengthToReplace) * sizeof(LChar));
1505 return newImpl.release();
1506 }
1507 UChar* data;
1508 RefPtr<StringImpl> newImpl =
1509 createUninitialized(length() - lengthToReplace + lengthToInsert, data);
1510 if (is8Bit())
1511 for (unsigned i = 0; i < position; ++i)
1512 data[i] = characters8()[i];
1513 else
1514 memcpy(data, characters16(), position * sizeof(UChar));
1515 if (str) {
1516 if (str->is8Bit())
1517 for (unsigned i = 0; i < lengthToInsert; ++i)
1518 data[i + position] = str->characters8()[i];
1601 else 1519 else
1602 memcpy(data, characters16(), position * sizeof(UChar)); 1520 memcpy(data + position, str->characters16(), lengthToInsert * sizeof(UChar ));
1603 if (str) { 1521 }
1604 if (str->is8Bit()) 1522 if (is8Bit()) {
1605 for (unsigned i = 0; i < lengthToInsert; ++i) 1523 for (unsigned i = 0; i < length() - position - lengthToReplace; ++i)
1606 data[i + position] = str->characters8()[i]; 1524 data[i + position + lengthToInsert] = characters8()[i + position + lengthT oReplace];
1607 else 1525 } else {
1608 memcpy(data + position, str->characters16(), lengthToInsert * sizeof (UChar)); 1526 memcpy(data + position + lengthToInsert, characters16() + position + lengthT oReplace,
1609 } 1527 (length() - position - lengthToReplace) * sizeof(UChar));
1610 if (is8Bit()) { 1528 }
1611 for (unsigned i = 0; i < length() - position - lengthToReplace; ++i) 1529 return newImpl.release();
1612 data[i + position + lengthToInsert] = characters8()[i + position + l engthToReplace]; 1530 }
1613 } else { 1531
1614 memcpy(data + position + lengthToInsert, characters16() + position + len gthToReplace, 1532 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacemen t) {
1615 (length() - position - lengthToReplace) * sizeof(UChar)); 1533 if (!replacement)
1616 } 1534 return this;
1535
1536 if (replacement->is8Bit())
1537 return replace(pattern, replacement->characters8(), replacement->length());
1538
1539 return replace(pattern, replacement->characters16(), replacement->length());
1540 }
1541
1542 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const LChar* replaceme nt, unsigned repStrLength) {
1543 ASSERT(replacement);
1544
1545 size_t srcSegmentStart = 0;
1546 unsigned matchCount = 0;
1547
1548 // Count the matches.
1549 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1550 ++matchCount;
1551 ++srcSegmentStart;
1552 }
1553
1554 // If we have 0 matches then we don't have to do any more work.
1555 if (!matchCount)
1556 return this;
1557
1558 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1559
1560 unsigned replaceSize = matchCount * repStrLength;
1561 unsigned newSize = m_length - matchCount;
1562 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
1563
1564 newSize += replaceSize;
1565
1566 // Construct the new data.
1567 size_t srcSegmentEnd;
1568 unsigned srcSegmentLength;
1569 srcSegmentStart = 0;
1570 unsigned dstOffset = 0;
1571
1572 if (is8Bit()) {
1573 LChar* data;
1574 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1575
1576 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1577 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1578 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1579 dstOffset += srcSegmentLength;
1580 memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar));
1581 dstOffset += repStrLength;
1582 srcSegmentStart = srcSegmentEnd + 1;
1583 }
1584
1585 srcSegmentLength = m_length - srcSegmentStart;
1586 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1587
1588 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1589
1617 return newImpl.release(); 1590 return newImpl.release();
1618 } 1591 }
1619 1592
1620 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacemen t) 1593 UChar* data;
1621 { 1594 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1622 if (!replacement) 1595
1623 return this; 1596 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1624 1597 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1625 if (replacement->is8Bit()) 1598 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1626 return replace(pattern, replacement->characters8(), replacement->length( )); 1599
1627 1600 dstOffset += srcSegmentLength;
1628 return replace(pattern, replacement->characters16(), replacement->length()); 1601 for (unsigned i = 0; i < repStrLength; ++i)
1629 } 1602 data[i + dstOffset] = replacement[i];
1630 1603
1631 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const LChar* replaceme nt, unsigned repStrLength) 1604 dstOffset += repStrLength;
1632 { 1605 srcSegmentStart = srcSegmentEnd + 1;
1633 ASSERT(replacement); 1606 }
1634 1607
1635 size_t srcSegmentStart = 0; 1608 srcSegmentLength = m_length - srcSegmentStart;
1636 unsigned matchCount = 0; 1609 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1637 1610
1638 // Count the matches. 1611 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1639 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) { 1612
1640 ++matchCount; 1613 return newImpl.release();
1641 ++srcSegmentStart; 1614 }
1642 } 1615
1643 1616 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const UChar* replaceme nt, unsigned repStrLength) {
1644 // If we have 0 matches then we don't have to do any more work. 1617 ASSERT(replacement);
1645 if (!matchCount) 1618
1646 return this; 1619 size_t srcSegmentStart = 0;
1647 1620 unsigned matchCount = 0;
1648 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max( ) / repStrLength); 1621
1649 1622 // Count the matches.
1650 unsigned replaceSize = matchCount * repStrLength; 1623 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1651 unsigned newSize = m_length - matchCount; 1624 ++matchCount;
1652 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize)); 1625 ++srcSegmentStart;
1653 1626 }
1654 newSize += replaceSize; 1627
1655 1628 // If we have 0 matches then we don't have to do any more work.
1656 // Construct the new data. 1629 if (!matchCount)
1657 size_t srcSegmentEnd; 1630 return this;
1658 unsigned srcSegmentLength; 1631
1659 srcSegmentStart = 0; 1632 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1660 unsigned dstOffset = 0; 1633
1661 1634 unsigned replaceSize = matchCount * repStrLength;
1662 if (is8Bit()) { 1635 unsigned newSize = m_length - matchCount;
1663 LChar* data; 1636 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
1664 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); 1637
1665 1638 newSize += replaceSize;
1666 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { 1639
1667 srcSegmentLength = srcSegmentEnd - srcSegmentStart; 1640 // Construct the new data.
1668 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegment Length * sizeof(LChar)); 1641 size_t srcSegmentEnd;
1669 dstOffset += srcSegmentLength; 1642 unsigned srcSegmentLength;
1670 memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar)); 1643 srcSegmentStart = 0;
1671 dstOffset += repStrLength; 1644 unsigned dstOffset = 0;
1672 srcSegmentStart = srcSegmentEnd + 1; 1645
1673 } 1646 if (is8Bit()) {
1674
1675 srcSegmentLength = m_length - srcSegmentStart;
1676 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLeng th * sizeof(LChar));
1677
1678 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1679
1680 return newImpl.release();
1681 }
1682
1683 UChar* data; 1647 UChar* data;
1684 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); 1648 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1685 1649
1686 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { 1650 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1687 srcSegmentLength = srcSegmentEnd - srcSegmentStart; 1651 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1688 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLen gth * sizeof(UChar)); 1652 for (unsigned i = 0; i < srcSegmentLength; ++i)
1689 1653 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1690 dstOffset += srcSegmentLength; 1654
1691 for (unsigned i = 0; i < repStrLength; ++i) 1655 dstOffset += srcSegmentLength;
1692 data[i + dstOffset] = replacement[i]; 1656 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
1693 1657
1694 dstOffset += repStrLength; 1658 dstOffset += repStrLength;
1695 srcSegmentStart = srcSegmentEnd + 1; 1659 srcSegmentStart = srcSegmentEnd + 1;
1696 } 1660 }
1697 1661
1698 srcSegmentLength = m_length - srcSegmentStart; 1662 srcSegmentLength = m_length - srcSegmentStart;
1663 for (unsigned i = 0; i < srcSegmentLength; ++i)
1664 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1665
1666 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1667
1668 return newImpl.release();
1669 }
1670
1671 UChar* data;
1672 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1673
1674 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1675 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1699 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar)); 1676 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1700 1677
1701 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); 1678 dstOffset += srcSegmentLength;
1702 1679 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
1703 return newImpl.release(); 1680
1704 } 1681 dstOffset += repStrLength;
1705 1682 srcSegmentStart = srcSegmentEnd + 1;
1706 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const UChar* replaceme nt, unsigned repStrLength) 1683 }
1707 { 1684
1708 ASSERT(replacement); 1685 srcSegmentLength = m_length - srcSegmentStart;
1709 1686 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1710 size_t srcSegmentStart = 0; 1687
1711 unsigned matchCount = 0; 1688 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1712 1689
1713 // Count the matches. 1690 return newImpl.release();
1714 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) { 1691 }
1715 ++matchCount; 1692
1716 ++srcSegmentStart; 1693 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* repl acement) {
1717 } 1694 if (!pattern || !replacement)
1718 1695 return this;
1719 // If we have 0 matches then we don't have to do any more work. 1696
1720 if (!matchCount) 1697 unsigned patternLength = pattern->length();
1721 return this; 1698 if (!patternLength)
1722 1699 return this;
1723 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max( ) / repStrLength); 1700
1724 1701 unsigned repStrLength = replacement->length();
1725 unsigned replaceSize = matchCount * repStrLength; 1702 size_t srcSegmentStart = 0;
1726 unsigned newSize = m_length - matchCount; 1703 unsigned matchCount = 0;
1727 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize)); 1704
1728 1705 // Count the matches.
1729 newSize += replaceSize; 1706 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1730 1707 ++matchCount;
1731 // Construct the new data. 1708 srcSegmentStart += patternLength;
1732 size_t srcSegmentEnd; 1709 }
1733 unsigned srcSegmentLength; 1710
1734 srcSegmentStart = 0; 1711 // If we have 0 matches, we don't have to do any more work
1735 unsigned dstOffset = 0; 1712 if (!matchCount)
1736 1713 return this;
1737 if (is8Bit()) { 1714
1738 UChar* data; 1715 unsigned newSize = m_length - matchCount * patternLength;
1739 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); 1716 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1740 1717
1741 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { 1718 RELEASE_ASSERT(newSize <= (numeric_limits<unsigned>::max() - matchCount * repS trLength));
1742 srcSegmentLength = srcSegmentEnd - srcSegmentStart; 1719
1743 for (unsigned i = 0; i < srcSegmentLength; ++i) 1720 newSize += matchCount * repStrLength;
1744 data[i + dstOffset] = characters8()[i + srcSegmentStart]; 1721
1745 1722 // Construct the new data
1746 dstOffset += srcSegmentLength; 1723 size_t srcSegmentEnd;
1747 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar)); 1724 unsigned srcSegmentLength;
1748 1725 srcSegmentStart = 0;
1749 dstOffset += repStrLength; 1726 unsigned dstOffset = 0;
1750 srcSegmentStart = srcSegmentEnd + 1; 1727 bool srcIs8Bit = is8Bit();
1751 } 1728 bool replacementIs8Bit = replacement->is8Bit();
1752 1729
1753 srcSegmentLength = m_length - srcSegmentStart; 1730 // There are 4 cases:
1754 for (unsigned i = 0; i < srcSegmentLength; ++i) 1731 // 1. This and replacement are both 8 bit.
1755 data[i + dstOffset] = characters8()[i + srcSegmentStart]; 1732 // 2. This and replacement are both 16 bit.
1756 1733 // 3. This is 8 bit and replacement is 16 bit.
1757 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); 1734 // 4. This is 16 bit and replacement is 8 bit.
1758 1735 if (srcIs8Bit && replacementIs8Bit) {
1759 return newImpl.release(); 1736 // Case 1
1760 } 1737 LChar* data;
1761
1762 UChar* data;
1763 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1764
1765 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1766 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1767 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLen gth * sizeof(UChar));
1768
1769 dstOffset += srcSegmentLength;
1770 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
1771
1772 dstOffset += repStrLength;
1773 srcSegmentStart = srcSegmentEnd + 1;
1774 }
1775
1776 srcSegmentLength = m_length - srcSegmentStart;
1777 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1778
1779 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1780
1781 return newImpl.release();
1782 }
1783
1784 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* repl acement)
1785 {
1786 if (!pattern || !replacement)
1787 return this;
1788
1789 unsigned patternLength = pattern->length();
1790 if (!patternLength)
1791 return this;
1792
1793 unsigned repStrLength = replacement->length();
1794 size_t srcSegmentStart = 0;
1795 unsigned matchCount = 0;
1796
1797 // Count the matches.
1798 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1799 ++matchCount;
1800 srcSegmentStart += patternLength;
1801 }
1802
1803 // If we have 0 matches, we don't have to do any more work
1804 if (!matchCount)
1805 return this;
1806
1807 unsigned newSize = m_length - matchCount * patternLength;
1808 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max( ) / repStrLength);
1809
1810 RELEASE_ASSERT(newSize <= (numeric_limits<unsigned>::max() - matchCount * re pStrLength));
1811
1812 newSize += matchCount * repStrLength;
1813
1814
1815 // Construct the new data
1816 size_t srcSegmentEnd;
1817 unsigned srcSegmentLength;
1818 srcSegmentStart = 0;
1819 unsigned dstOffset = 0;
1820 bool srcIs8Bit = is8Bit();
1821 bool replacementIs8Bit = replacement->is8Bit();
1822
1823 // There are 4 cases:
1824 // 1. This and replacement are both 8 bit.
1825 // 2. This and replacement are both 16 bit.
1826 // 3. This is 8 bit and replacement is 16 bit.
1827 // 4. This is 16 bit and replacement is 8 bit.
1828 if (srcIs8Bit && replacementIs8Bit) {
1829 // Case 1
1830 LChar* data;
1831 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1832 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1833 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1834 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegment Length * sizeof(LChar));
1835 dstOffset += srcSegmentLength;
1836 memcpy(data + dstOffset, replacement->characters8(), repStrLength * sizeof(LChar));
1837 dstOffset += repStrLength;
1838 srcSegmentStart = srcSegmentEnd + patternLength;
1839 }
1840
1841 srcSegmentLength = m_length - srcSegmentStart;
1842 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLeng th * sizeof(LChar));
1843
1844 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1845
1846 return newImpl.release();
1847 }
1848
1849 UChar* data;
1850 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); 1738 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1851 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { 1739 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1852 srcSegmentLength = srcSegmentEnd - srcSegmentStart; 1740 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1853 if (srcIs8Bit) { 1741 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1854 // Case 3. 1742 dstOffset += srcSegmentLength;
1855 for (unsigned i = 0; i < srcSegmentLength; ++i) 1743 memcpy(data + dstOffset, replacement->characters8(), repStrLength * sizeof (LChar));
1856 data[i + dstOffset] = characters8()[i + srcSegmentStart]; 1744 dstOffset += repStrLength;
1857 } else { 1745 srcSegmentStart = srcSegmentEnd + patternLength;
1858 // Case 2 & 4.
1859 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmen tLength * sizeof(UChar));
1860 }
1861 dstOffset += srcSegmentLength;
1862 if (replacementIs8Bit) {
1863 // Cases 2 & 3.
1864 for (unsigned i = 0; i < repStrLength; ++i)
1865 data[i + dstOffset] = replacement->characters8()[i];
1866 } else {
1867 // Case 4
1868 memcpy(data + dstOffset, replacement->characters16(), repStrLength * sizeof(UChar));
1869 }
1870 dstOffset += repStrLength;
1871 srcSegmentStart = srcSegmentEnd + patternLength;
1872 } 1746 }
1873 1747
1874 srcSegmentLength = m_length - srcSegmentStart; 1748 srcSegmentLength = m_length - srcSegmentStart;
1749 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1750
1751 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1752
1753 return newImpl.release();
1754 }
1755
1756 UChar* data;
1757 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1758 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1759 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1875 if (srcIs8Bit) { 1760 if (srcIs8Bit) {
1876 // Case 3. 1761 // Case 3.
1877 for (unsigned i = 0; i < srcSegmentLength; ++i) 1762 for (unsigned i = 0; i < srcSegmentLength; ++i)
1878 data[i + dstOffset] = characters8()[i + srcSegmentStart]; 1763 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1879 } else { 1764 } else {
1880 // Cases 2 & 4. 1765 // Case 2 & 4.
1881 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLen gth * sizeof(UChar)); 1766 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLengt h * sizeof(UChar));
1882 } 1767 }
1883 1768 dstOffset += srcSegmentLength;
1884 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); 1769 if (replacementIs8Bit) {
1885 1770 // Cases 2 & 3.
1886 return newImpl.release(); 1771 for (unsigned i = 0; i < repStrLength; ++i)
1887 } 1772 data[i + dstOffset] = replacement->characters8()[i];
1888 1773 } else {
1889 PassRefPtr<StringImpl> StringImpl::upconvertedString() 1774 // Case 4
1890 { 1775 memcpy(data + dstOffset, replacement->characters16(), repStrLength * sizeo f(UChar));
1891 if (is8Bit()) 1776 }
1892 return String::make16BitFrom8BitSource(characters8(), m_length).releaseI mpl(); 1777 dstOffset += repStrLength;
1893 return this; 1778 srcSegmentStart = srcSegmentEnd + patternLength;
1894 } 1779 }
1895 1780
1896 static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl* b) 1781 srcSegmentLength = m_length - srcSegmentStart;
1897 { 1782 if (srcIs8Bit) {
1898 unsigned aLength = a->length(); 1783 // Case 3.
1899 unsigned bLength = b->length(); 1784 for (unsigned i = 0; i < srcSegmentLength; ++i)
1900 if (aLength != bLength) 1785 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1786 } else {
1787 // Cases 2 & 4.
1788 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1789 }
1790
1791 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1792
1793 return newImpl.release();
1794 }
1795
1796 PassRefPtr<StringImpl> StringImpl::upconvertedString() {
1797 if (is8Bit())
1798 return String::make16BitFrom8BitSource(characters8(), m_length).releaseImpl( );
1799 return this;
1800 }
1801
1802 static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl* b) {
1803 unsigned aLength = a->length();
1804 unsigned bLength = b->length();
1805 if (aLength != bLength)
1806 return false;
1807
1808 if (a->is8Bit()) {
1809 if (b->is8Bit())
1810 return equal(a->characters8(), b->characters8(), aLength);
1811
1812 return equal(a->characters8(), b->characters16(), aLength);
1813 }
1814
1815 if (b->is8Bit())
1816 return equal(a->characters16(), b->characters8(), aLength);
1817
1818 return equal(a->characters16(), b->characters16(), aLength);
1819 }
1820
1821 bool equal(const StringImpl* a, const StringImpl* b) {
1822 if (a == b)
1823 return true;
1824 if (!a || !b)
1825 return false;
1826 if (a->isAtomic() && b->isAtomic())
1827 return false;
1828
1829 return stringImplContentEqual(a, b);
1830 }
1831
1832 template <typename CharType>
1833 inline bool equalInternal(const StringImpl* a, const CharType* b, unsigned lengt h) {
1834 if (!a)
1835 return !b;
1836 if (!b)
1837 return false;
1838
1839 if (a->length() != length)
1840 return false;
1841 if (a->is8Bit())
1842 return equal(a->characters8(), b, length);
1843 return equal(a->characters16(), b, length);
1844 }
1845
1846 bool equal(const StringImpl* a, const LChar* b, unsigned length) {
1847 return equalInternal(a, b, length);
1848 }
1849
1850 bool equal(const StringImpl* a, const UChar* b, unsigned length) {
1851 return equalInternal(a, b, length);
1852 }
1853
1854 bool equal(const StringImpl* a, const LChar* b) {
1855 if (!a)
1856 return !b;
1857 if (!b)
1858 return !a;
1859
1860 unsigned length = a->length();
1861
1862 if (a->is8Bit()) {
1863 const LChar* aPtr = a->characters8();
1864 for (unsigned i = 0; i != length; ++i) {
1865 LChar bc = b[i];
1866 LChar ac = aPtr[i];
1867 if (!bc)
1901 return false; 1868 return false;
1902 1869 if (ac != bc)
1903 if (a->is8Bit()) {
1904 if (b->is8Bit())
1905 return equal(a->characters8(), b->characters8(), aLength);
1906
1907 return equal(a->characters8(), b->characters16(), aLength);
1908 }
1909
1910 if (b->is8Bit())
1911 return equal(a->characters16(), b->characters8(), aLength);
1912
1913 return equal(a->characters16(), b->characters16(), aLength);
1914 }
1915
1916 bool equal(const StringImpl* a, const StringImpl* b)
1917 {
1918 if (a == b)
1919 return true;
1920 if (!a || !b)
1921 return false; 1870 return false;
1922 if (a->isAtomic() && b->isAtomic()) 1871 }
1872
1873 return !b[length];
1874 }
1875
1876 const UChar* aPtr = a->characters16();
1877 for (unsigned i = 0; i != length; ++i) {
1878 LChar bc = b[i];
1879 if (!bc)
1880 return false;
1881 if (aPtr[i] != bc)
1882 return false;
1883 }
1884
1885 return !b[length];
1886 }
1887
1888 bool equalNonNull(const StringImpl* a, const StringImpl* b) {
1889 ASSERT(a && b);
1890 if (a == b)
1891 return true;
1892
1893 return stringImplContentEqual(a, b);
1894 }
1895
1896 bool equalIgnoringCase(const StringImpl* a, const StringImpl* b) {
1897 if (a == b)
1898 return true;
1899 if (!a || !b)
1900 return false;
1901
1902 return CaseFoldingHash::equal(a, b);
1903 }
1904
1905 bool equalIgnoringCase(const StringImpl* a, const LChar* b) {
1906 if (!a)
1907 return !b;
1908 if (!b)
1909 return !a;
1910
1911 unsigned length = a->length();
1912
1913 // Do a faster loop for the case where all the characters are ASCII.
1914 UChar ored = 0;
1915 bool equal = true;
1916 if (a->is8Bit()) {
1917 const LChar* as = a->characters8();
1918 for (unsigned i = 0; i != length; ++i) {
1919 LChar bc = b[i];
1920 if (!bc)
1923 return false; 1921 return false;
1924 1922 UChar ac = as[i];
1925 return stringImplContentEqual(a, b); 1923 ored |= ac;
1926 } 1924 equal = equal && (toASCIILower(ac) == toASCIILower(bc));
1927
1928 template <typename CharType>
1929 inline bool equalInternal(const StringImpl* a, const CharType* b, unsigned lengt h)
1930 {
1931 if (!a)
1932 return !b;
1933 if (!b)
1934 return false;
1935
1936 if (a->length() != length)
1937 return false;
1938 if (a->is8Bit())
1939 return equal(a->characters8(), b, length);
1940 return equal(a->characters16(), b, length);
1941 }
1942
1943 bool equal(const StringImpl* a, const LChar* b, unsigned length)
1944 {
1945 return equalInternal(a, b, length);
1946 }
1947
1948 bool equal(const StringImpl* a, const UChar* b, unsigned length)
1949 {
1950 return equalInternal(a, b, length);
1951 }
1952
1953 bool equal(const StringImpl* a, const LChar* b)
1954 {
1955 if (!a)
1956 return !b;
1957 if (!b)
1958 return !a;
1959
1960 unsigned length = a->length();
1961
1962 if (a->is8Bit()) {
1963 const LChar* aPtr = a->characters8();
1964 for (unsigned i = 0; i != length; ++i) {
1965 LChar bc = b[i];
1966 LChar ac = aPtr[i];
1967 if (!bc)
1968 return false;
1969 if (ac != bc)
1970 return false;
1971 }
1972
1973 return !b[length];
1974 }
1975
1976 const UChar* aPtr = a->characters16();
1977 for (unsigned i = 0; i != length; ++i) {
1978 LChar bc = b[i];
1979 if (!bc)
1980 return false;
1981 if (aPtr[i] != bc)
1982 return false;
1983 }
1984
1985 return !b[length];
1986 }
1987
1988 bool equalNonNull(const StringImpl* a, const StringImpl* b)
1989 {
1990 ASSERT(a && b);
1991 if (a == b)
1992 return true;
1993
1994 return stringImplContentEqual(a, b);
1995 }
1996
1997 bool equalIgnoringCase(const StringImpl* a, const StringImpl* b)
1998 {
1999 if (a == b)
2000 return true;
2001 if (!a || !b)
2002 return false;
2003
2004 return CaseFoldingHash::equal(a, b);
2005 }
2006
2007 bool equalIgnoringCase(const StringImpl* a, const LChar* b)
2008 {
2009 if (!a)
2010 return !b;
2011 if (!b)
2012 return !a;
2013
2014 unsigned length = a->length();
2015
2016 // Do a faster loop for the case where all the characters are ASCII.
2017 UChar ored = 0;
2018 bool equal = true;
2019 if (a->is8Bit()) {
2020 const LChar* as = a->characters8();
2021 for (unsigned i = 0; i != length; ++i) {
2022 LChar bc = b[i];
2023 if (!bc)
2024 return false;
2025 UChar ac = as[i];
2026 ored |= ac;
2027 equal = equal && (toASCIILower(ac) == toASCIILower(bc));
2028 }
2029
2030 // Do a slower implementation for cases that include non-ASCII character s.
2031 if (ored & ~0x7F) {
2032 equal = true;
2033 for (unsigned i = 0; i != length; ++i)
2034 equal = equal && (foldCase(as[i]) == foldCase(b[i]));
2035 }
2036
2037 return equal && !b[length];
2038 }
2039
2040 const UChar* as = a->characters16();
2041 for (unsigned i = 0; i != length; ++i) {
2042 LChar bc = b[i];
2043 if (!bc)
2044 return false;
2045 UChar ac = as[i];
2046 ored |= ac;
2047 equal = equal && (toASCIILower(ac) == toASCIILower(bc));
2048 } 1925 }
2049 1926
2050 // Do a slower implementation for cases that include non-ASCII characters. 1927 // Do a slower implementation for cases that include non-ASCII characters.
2051 if (ored & ~0x7F) { 1928 if (ored & ~0x7F) {
2052 equal = true; 1929 equal = true;
2053 for (unsigned i = 0; i != length; ++i) { 1930 for (unsigned i = 0; i != length; ++i)
2054 equal = equal && (foldCase(as[i]) == foldCase(b[i])); 1931 equal = equal && (foldCase(as[i]) == foldCase(b[i]));
2055 }
2056 } 1932 }
2057 1933
2058 return equal && !b[length]; 1934 return equal && !b[length];
2059 } 1935 }
2060 1936
2061 bool equalIgnoringCaseNonNull(const StringImpl* a, const StringImpl* b) 1937 const UChar* as = a->characters16();
2062 { 1938 for (unsigned i = 0; i != length; ++i) {
2063 ASSERT(a && b); 1939 LChar bc = b[i];
2064 if (a == b) 1940 if (!bc)
2065 return true; 1941 return false;
2066 1942 UChar ac = as[i];
2067 unsigned length = a->length(); 1943 ored |= ac;
2068 if (length != b->length()) 1944 equal = equal && (toASCIILower(ac) == toASCIILower(bc));
2069 return false; 1945 }
2070 1946
2071 if (a->is8Bit()) { 1947 // Do a slower implementation for cases that include non-ASCII characters.
2072 if (b->is8Bit()) 1948 if (ored & ~0x7F) {
2073 return equalIgnoringCase(a->characters8(), b->characters8(), length) ; 1949 equal = true;
2074 1950 for (unsigned i = 0; i != length; ++i) {
2075 return equalIgnoringCase(b->characters16(), a->characters8(), length); 1951 equal = equal && (foldCase(as[i]) == foldCase(b[i]));
2076 } 1952 }
2077 1953 }
1954
1955 return equal && !b[length];
1956 }
1957
1958 bool equalIgnoringCaseNonNull(const StringImpl* a, const StringImpl* b) {
1959 ASSERT(a && b);
1960 if (a == b)
1961 return true;
1962
1963 unsigned length = a->length();
1964 if (length != b->length())
1965 return false;
1966
1967 if (a->is8Bit()) {
2078 if (b->is8Bit()) 1968 if (b->is8Bit())
2079 return equalIgnoringCase(a->characters16(), b->characters8(), length); 1969 return equalIgnoringCase(a->characters8(), b->characters8(), length);
2080 1970
2081 return equalIgnoringCase(a->characters16(), b->characters16(), length); 1971 return equalIgnoringCase(b->characters16(), a->characters8(), length);
2082 } 1972 }
2083 1973
2084 bool equalIgnoringNullity(StringImpl* a, StringImpl* b) 1974 if (b->is8Bit())
2085 { 1975 return equalIgnoringCase(a->characters16(), b->characters8(), length);
2086 if (!a && b && !b->length()) 1976
2087 return true; 1977 return equalIgnoringCase(a->characters16(), b->characters16(), length);
2088 if (!b && a && !a->length()) 1978 }
2089 return true; 1979
2090 return equal(a, b); 1980 bool equalIgnoringNullity(StringImpl* a, StringImpl* b) {
2091 } 1981 if (!a && b && !b->length())
2092 1982 return true;
2093 size_t StringImpl::sizeInBytes() const 1983 if (!b && a && !a->length())
2094 { 1984 return true;
2095 size_t size = length(); 1985 return equal(a, b);
2096 if (!is8Bit()) 1986 }
2097 size *= 2; 1987
2098 return size + sizeof(*this); 1988 size_t StringImpl::sizeInBytes() const {
2099 } 1989 size_t size = length();
2100 1990 if (!is8Bit())
2101 UChar32 toUpper(UChar32 c, const AtomicString& localeIdentifier) 1991 size *= 2;
2102 { 1992 return size + sizeof(*this);
2103 if (!localeIdentifier.isNull()) { 1993 }
2104 if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(l ocaleIdentifier, "az")) { 1994
2105 if (c == 'i') 1995 UChar32 toUpper(UChar32 c, const AtomicString& localeIdentifier) {
2106 return latinCapitalLetterIWithDotAbove; 1996 if (!localeIdentifier.isNull()) {
2107 if (c == latinSmallLetterDotlessI) 1997 if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(local eIdentifier, "az")) {
2108 return 'I'; 1998 if (c == 'i')
2109 } else if (localeIdMatchesLang(localeIdentifier, "lt")) { 1999 return latinCapitalLetterIWithDotAbove;
2110 // TODO(rob.buis) implement upper-casing rules for lt 2000 if (c == latinSmallLetterDotlessI)
2111 // like in StringImpl::upper(locale). 2001 return 'I';
2112 } 2002 } else if (localeIdMatchesLang(localeIdentifier, "lt")) {
2113 } 2003 // TODO(rob.buis) implement upper-casing rules for lt
2114 2004 // like in StringImpl::upper(locale).
2115 return toUpper(c); 2005 }
2116 } 2006 }
2117 2007
2118 } // namespace WTF 2008 return toUpper(c);
2009 }
2010
2011 } // namespace WTF
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/wtf/text/StringImpl.h ('k') | third_party/WebKit/Source/wtf/text/StringImplCF.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698