OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org) | |
3 * (C) 1999 Antti Koivisto (koivisto@kde.org) | |
4 * (C) 2001 Dirk Mueller ( mueller@kde.org ) | |
5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All | |
6 * rights reserved. | |
7 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net) | |
8 * | |
9 * This library is free software; you can redistribute it and/or | |
10 * modify it under the terms of the GNU Library General Public | |
11 * License as published by the Free Software Foundation; either | |
12 * version 2 of the License, or (at your option) any later version. | |
13 * | |
14 * This library is distributed in the hope that it will be useful, | |
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 * Library General Public License for more details. | |
18 * | |
19 * You should have received a copy of the GNU Library General Public License | |
20 * along with this library; see the file COPYING.LIB. If not, write to | |
21 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
22 * Boston, MA 02110-1301, USA. | |
23 * | |
24 */ | |
25 | |
26 #include "wtf/text/StringImpl.h" | |
27 | |
28 #include "wtf/DynamicAnnotations.h" | |
29 #include "wtf/LeakAnnotations.h" | |
30 #include "wtf/PtrUtil.h" | |
31 #include "wtf/StaticConstructors.h" | |
32 #include "wtf/StdLibExtras.h" | |
33 #include "wtf/allocator/Partitions.h" | |
34 #include "wtf/text/AtomicString.h" | |
35 #include "wtf/text/AtomicStringTable.h" | |
36 #include "wtf/text/CString.h" | |
37 #include "wtf/text/CharacterNames.h" | |
38 #include "wtf/text/StringBuffer.h" | |
39 #include "wtf/text/StringHash.h" | |
40 #include "wtf/text/StringToNumber.h" | |
41 #include <algorithm> | |
42 #include <memory> | |
43 | |
44 #ifdef STRING_STATS | |
45 #include "wtf/DataLog.h" | |
46 #include "wtf/HashMap.h" | |
47 #include "wtf/HashSet.h" | |
48 #include "wtf/RefCounted.h" | |
49 #include "wtf/ThreadingPrimitives.h" | |
50 #include <unistd.h> | |
51 #endif | |
52 | |
53 using namespace std; | |
54 | |
55 namespace WTF { | |
56 | |
57 using namespace Unicode; | |
58 | |
59 // As of Jan 2017, StringImpl needs 2 * sizeof(int) + 29 bits of data, and | |
60 // sizeof(ThreadRestrictionVerifier) is 16 bytes. Thus, in DCHECK mode the | |
61 // class may be padded to 32 bytes. | |
62 #if DCHECK_IS_ON() | |
63 static_assert(sizeof(StringImpl) <= 8 * sizeof(int), | |
64 "StringImpl should stay small"); | |
65 #else | |
66 static_assert(sizeof(StringImpl) <= 3 * sizeof(int), | |
67 "StringImpl should stay small"); | |
68 #endif | |
69 | |
70 #ifdef STRING_STATS | |
71 | |
72 static Mutex& statsMutex() { | |
73 DEFINE_STATIC_LOCAL(Mutex, mutex, ()); | |
74 return mutex; | |
75 } | |
76 | |
77 static HashSet<void*>& liveStrings() { | |
78 // Notice that we can't use HashSet<StringImpl*> because then HashSet would | |
79 // dedup identical strings. | |
80 DEFINE_STATIC_LOCAL(HashSet<void*>, strings, ()); | |
81 return strings; | |
82 } | |
83 | |
84 void addStringForStats(StringImpl* string) { | |
85 MutexLocker locker(statsMutex()); | |
86 liveStrings().add(string); | |
87 } | |
88 | |
89 void removeStringForStats(StringImpl* string) { | |
90 MutexLocker locker(statsMutex()); | |
91 liveStrings().remove(string); | |
92 } | |
93 | |
94 static void fillWithSnippet(const StringImpl* string, Vector<char>& snippet) { | |
95 const unsigned kMaxSnippetLength = 64; | |
96 snippet.clear(); | |
97 | |
98 size_t expectedLength = std::min(string->length(), kMaxSnippetLength); | |
99 if (expectedLength == kMaxSnippetLength) | |
100 expectedLength += 3; // For the "...". | |
101 ++expectedLength; // For the terminating '\0'. | |
102 snippet.reserveCapacity(expectedLength); | |
103 | |
104 size_t i; | |
105 for (i = 0; i < string->length() && i < kMaxSnippetLength; ++i) { | |
106 UChar c = (*string)[i]; | |
107 if (isASCIIPrintable(c)) | |
108 snippet.append(c); | |
109 else | |
110 snippet.append('?'); | |
111 } | |
112 if (i < string->length()) { | |
113 snippet.append('.'); | |
114 snippet.append('.'); | |
115 snippet.append('.'); | |
116 } | |
117 snippet.append('\0'); | |
118 } | |
119 | |
120 static bool isUnnecessarilyWide(const StringImpl* string) { | |
121 if (string->is8Bit()) | |
122 return false; | |
123 UChar c = 0; | |
124 for (unsigned i = 0; i < string->length(); ++i) | |
125 c |= (*string)[i] >> 8; | |
126 return !c; | |
127 } | |
128 | |
129 class PerStringStats : public RefCounted<PerStringStats> { | |
130 public: | |
131 static PassRefPtr<PerStringStats> create() { | |
132 return adoptRef(new PerStringStats); | |
133 } | |
134 | |
135 void add(const StringImpl* string) { | |
136 ++m_numberOfCopies; | |
137 if (!m_length) { | |
138 m_length = string->length(); | |
139 fillWithSnippet(string, m_snippet); | |
140 } | |
141 if (string->isAtomic()) | |
142 ++m_numberOfAtomicCopies; | |
143 if (isUnnecessarilyWide(string)) | |
144 m_unnecessarilyWide = true; | |
145 } | |
146 | |
147 size_t totalCharacters() const { return m_numberOfCopies * m_length; } | |
148 | |
149 void print() { | |
150 const char* status = "ok"; | |
151 if (m_unnecessarilyWide) | |
152 status = "16"; | |
153 dataLogF("%8u copies (%s) of length %8u %s\n", m_numberOfCopies, status, | |
154 m_length, m_snippet.data()); | |
155 } | |
156 | |
157 bool m_unnecessarilyWide; | |
158 unsigned m_numberOfCopies; | |
159 unsigned m_length; | |
160 unsigned m_numberOfAtomicCopies; | |
161 Vector<char> m_snippet; | |
162 | |
163 private: | |
164 PerStringStats() | |
165 : m_unnecessarilyWide(false), | |
166 m_numberOfCopies(0), | |
167 m_length(0), | |
168 m_numberOfAtomicCopies(0) {} | |
169 }; | |
170 | |
171 bool operator<(const RefPtr<PerStringStats>& a, | |
172 const RefPtr<PerStringStats>& b) { | |
173 if (a->m_unnecessarilyWide != b->m_unnecessarilyWide) | |
174 return !a->m_unnecessarilyWide && b->m_unnecessarilyWide; | |
175 if (a->totalCharacters() != b->totalCharacters()) | |
176 return a->totalCharacters() < b->totalCharacters(); | |
177 if (a->m_numberOfCopies != b->m_numberOfCopies) | |
178 return a->m_numberOfCopies < b->m_numberOfCopies; | |
179 if (a->m_length != b->m_length) | |
180 return a->m_length < b->m_length; | |
181 return a->m_numberOfAtomicCopies < b->m_numberOfAtomicCopies; | |
182 } | |
183 | |
184 static void printLiveStringStats(void*) { | |
185 MutexLocker locker(statsMutex()); | |
186 HashSet<void*>& strings = liveStrings(); | |
187 | |
188 HashMap<StringImpl*, RefPtr<PerStringStats>> stats; | |
189 for (HashSet<void*>::iterator iter = strings.begin(); iter != strings.end(); | |
190 ++iter) { | |
191 StringImpl* string = static_cast<StringImpl*>(*iter); | |
192 HashMap<StringImpl*, RefPtr<PerStringStats>>::iterator entry = | |
193 stats.find(string); | |
194 RefPtr<PerStringStats> value = | |
195 entry == stats.end() ? RefPtr<PerStringStats>(PerStringStats::create()) | |
196 : entry->value; | |
197 value->add(string); | |
198 stats.set(string, value.release()); | |
199 } | |
200 | |
201 Vector<RefPtr<PerStringStats>> all; | |
202 for (HashMap<StringImpl*, RefPtr<PerStringStats>>::iterator iter = | |
203 stats.begin(); | |
204 iter != stats.end(); ++iter) | |
205 all.append(iter->value); | |
206 | |
207 std::sort(all.begin(), all.end()); | |
208 std::reverse(all.begin(), all.end()); | |
209 for (size_t i = 0; i < 20 && i < all.size(); ++i) | |
210 all[i]->print(); | |
211 } | |
212 | |
213 StringStats StringImpl::m_stringStats; | |
214 | |
215 unsigned StringStats::s_stringRemovesTillPrintStats = | |
216 StringStats::s_printStringStatsFrequency; | |
217 | |
218 void StringStats::removeString(StringImpl* string) { | |
219 unsigned length = string->length(); | |
220 --m_totalNumberStrings; | |
221 | |
222 if (string->is8Bit()) { | |
223 --m_number8BitStrings; | |
224 m_total8BitData -= length; | |
225 } else { | |
226 --m_number16BitStrings; | |
227 m_total16BitData -= length; | |
228 } | |
229 | |
230 if (!--s_stringRemovesTillPrintStats) { | |
231 s_stringRemovesTillPrintStats = s_printStringStatsFrequency; | |
232 printStats(); | |
233 } | |
234 } | |
235 | |
236 void StringStats::printStats() { | |
237 dataLogF("String stats for process id %d:\n", getpid()); | |
238 | |
239 unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitData; | |
240 double percent8Bit = | |
241 m_totalNumberStrings | |
242 ? ((double)m_number8BitStrings * 100) / (double)m_totalNumberStrings | |
243 : 0.0; | |
244 double average8bitLength = | |
245 m_number8BitStrings | |
246 ? (double)m_total8BitData / (double)m_number8BitStrings | |
247 : 0.0; | |
248 dataLogF( | |
249 "%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length " | |
250 "%6.1f\n", | |
251 m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData, | |
252 average8bitLength); | |
253 | |
254 double percent16Bit = | |
255 m_totalNumberStrings | |
256 ? ((double)m_number16BitStrings * 100) / (double)m_totalNumberStrings | |
257 : 0.0; | |
258 double average16bitLength = | |
259 m_number16BitStrings | |
260 ? (double)m_total16BitData / (double)m_number16BitStrings | |
261 : 0.0; | |
262 dataLogF( | |
263 "%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length " | |
264 "%6.1f\n", | |
265 m_number16BitStrings, percent16Bit, m_total16BitData, | |
266 m_total16BitData * 2, average16bitLength); | |
267 | |
268 double averageLength = | |
269 m_totalNumberStrings | |
270 ? (double)totalNumberCharacters / (double)m_totalNumberStrings | |
271 : 0.0; | |
272 unsigned long long totalDataBytes = m_total8BitData + m_total16BitData * 2; | |
273 dataLogF( | |
274 "%8u Total %12llu chars %12llu bytes avg length " | |
275 "%6.1f\n", | |
276 m_totalNumberStrings, totalNumberCharacters, totalDataBytes, | |
277 averageLength); | |
278 unsigned long long totalSavedBytes = m_total8BitData; | |
279 double percentSavings = totalSavedBytes | |
280 ? ((double)totalSavedBytes * 100) / | |
281 (double)(totalDataBytes + totalSavedBytes) | |
282 : 0.0; | |
283 dataLogF(" Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes, | |
284 percentSavings); | |
285 | |
286 unsigned totalOverhead = m_totalNumberStrings * sizeof(StringImpl); | |
287 double overheadPercent = (double)totalOverhead / (double)totalDataBytes * 100; | |
288 dataLogF(" StringImpl overheader: %8u (%5.2f%%)\n", totalOverhead, | |
289 overheadPercent); | |
290 | |
291 internal::callOnMainThread(&printLiveStringStats, nullptr); | |
292 } | |
293 #endif | |
294 | |
295 void* StringImpl::operator new(size_t size) { | |
296 DCHECK_EQ(size, sizeof(StringImpl)); | |
297 return Partitions::bufferMalloc(size, "WTF::StringImpl"); | |
298 } | |
299 | |
300 void StringImpl::operator delete(void* ptr) { | |
301 Partitions::bufferFree(ptr); | |
302 } | |
303 | |
304 inline StringImpl::~StringImpl() { | |
305 DCHECK(!isStatic()); | |
306 | |
307 STRING_STATS_REMOVE_STRING(this); | |
308 | |
309 if (isAtomic()) | |
310 AtomicStringTable::instance().remove(this); | |
311 } | |
312 | |
313 void StringImpl::destroyIfNotStatic() const { | |
314 if (!isStatic()) | |
315 delete this; | |
316 } | |
317 | |
318 void StringImpl::updateContainsOnlyASCII() const { | |
319 m_containsOnlyASCII = is8Bit() | |
320 ? charactersAreAllASCII(characters8(), length()) | |
321 : charactersAreAllASCII(characters16(), length()); | |
322 m_needsASCIICheck = false; | |
323 } | |
324 | |
325 bool StringImpl::isSafeToSendToAnotherThread() const { | |
326 if (isStatic()) | |
327 return true; | |
328 // AtomicStrings are not safe to send between threads as ~StringImpl() | |
329 // will try to remove them from the wrong AtomicStringTable. | |
330 if (isAtomic()) | |
331 return false; | |
332 if (hasOneRef()) | |
333 return true; | |
334 return false; | |
335 } | |
336 | |
337 #if DCHECK_IS_ON() | |
338 std::string StringImpl::asciiForDebugging() const { | |
339 CString ascii = String(isolatedCopy()->substring(0, 128)).ascii(); | |
340 return std::string(ascii.data(), ascii.length()); | |
341 } | |
342 #endif | |
343 | |
344 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, | |
345 LChar*& data) { | |
346 if (!length) { | |
347 data = 0; | |
348 return empty; | |
349 } | |
350 | |
351 // Allocate a single buffer large enough to contain the StringImpl | |
352 // struct as well as the data which it contains. This removes one | |
353 // heap allocation from this call. | |
354 StringImpl* string = static_cast<StringImpl*>(Partitions::bufferMalloc( | |
355 allocationSize<LChar>(length), "WTF::StringImpl")); | |
356 | |
357 data = reinterpret_cast<LChar*>(string + 1); | |
358 return adoptRef(new (string) StringImpl(length, Force8BitConstructor)); | |
359 } | |
360 | |
361 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, | |
362 UChar*& data) { | |
363 if (!length) { | |
364 data = 0; | |
365 return empty; | |
366 } | |
367 | |
368 // Allocate a single buffer large enough to contain the StringImpl | |
369 // struct as well as the data which it contains. This removes one | |
370 // heap allocation from this call. | |
371 StringImpl* string = static_cast<StringImpl*>(Partitions::bufferMalloc( | |
372 allocationSize<UChar>(length), "WTF::StringImpl")); | |
373 | |
374 data = reinterpret_cast<UChar*>(string + 1); | |
375 return adoptRef(new (string) StringImpl(length)); | |
376 } | |
377 | |
378 static StaticStringsTable& staticStrings() { | |
379 DEFINE_STATIC_LOCAL(StaticStringsTable, staticStrings, ()); | |
380 return staticStrings; | |
381 } | |
382 | |
383 #if DCHECK_IS_ON() | |
384 static bool s_allowCreationOfStaticStrings = true; | |
385 #endif | |
386 | |
387 const StaticStringsTable& StringImpl::allStaticStrings() { | |
388 return staticStrings(); | |
389 } | |
390 | |
391 void StringImpl::freezeStaticStrings() { | |
392 DCHECK(isMainThread()); | |
393 | |
394 #if DCHECK_IS_ON() | |
395 s_allowCreationOfStaticStrings = false; | |
396 #endif | |
397 } | |
398 | |
399 unsigned StringImpl::m_highestStaticStringLength = 0; | |
400 | |
401 DEFINE_GLOBAL(StringImpl, globalEmpty); | |
402 DEFINE_GLOBAL(StringImpl, globalEmpty16Bit); | |
403 // Callers need the global empty strings to be non-const. | |
404 StringImpl* StringImpl::empty = const_cast<StringImpl*>(&globalEmpty); | |
405 StringImpl* StringImpl::empty16Bit = const_cast<StringImpl*>(&globalEmpty16Bit); | |
406 void StringImpl::initStatics() { | |
407 new ((void*)empty) StringImpl(ConstructEmptyString); | |
408 new ((void*)empty16Bit) StringImpl(ConstructEmptyString16Bit); | |
409 WTF_ANNOTATE_BENIGN_RACE(StringImpl::empty, | |
410 "Benign race on the reference counter of a static " | |
411 "string created by StringImpl::empty"); | |
412 WTF_ANNOTATE_BENIGN_RACE(StringImpl::empty16Bit, | |
413 "Benign race on the reference counter of a static " | |
414 "string created by StringImpl::empty16Bit"); | |
415 } | |
416 | |
417 StringImpl* StringImpl::createStatic(const char* string, | |
418 unsigned length, | |
419 unsigned hash) { | |
420 #if DCHECK_IS_ON() | |
421 DCHECK(s_allowCreationOfStaticStrings); | |
422 #endif | |
423 DCHECK(string); | |
424 DCHECK(length); | |
425 | |
426 StaticStringsTable::const_iterator it = staticStrings().find(hash); | |
427 if (it != staticStrings().end()) { | |
428 DCHECK(!memcmp(string, it->value + 1, length * sizeof(LChar))); | |
429 return it->value; | |
430 } | |
431 | |
432 // Allocate a single buffer large enough to contain the StringImpl | |
433 // struct as well as the data which it contains. This removes one | |
434 // heap allocation from this call. | |
435 RELEASE_ASSERT(length <= | |
436 ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / | |
437 sizeof(LChar))); | |
438 size_t size = sizeof(StringImpl) + length * sizeof(LChar); | |
439 | |
440 WTF_INTERNAL_LEAK_SANITIZER_DISABLED_SCOPE; | |
441 StringImpl* impl = static_cast<StringImpl*>( | |
442 Partitions::bufferMalloc(size, "WTF::StringImpl")); | |
443 | |
444 LChar* data = reinterpret_cast<LChar*>(impl + 1); | |
445 impl = new (impl) StringImpl(length, hash, StaticString); | |
446 memcpy(data, string, length * sizeof(LChar)); | |
447 #if DCHECK_IS_ON() | |
448 impl->assertHashIsCorrect(); | |
449 #endif | |
450 | |
451 DCHECK(isMainThread()); | |
452 m_highestStaticStringLength = std::max(m_highestStaticStringLength, length); | |
453 staticStrings().insert(hash, impl); | |
454 WTF_ANNOTATE_BENIGN_RACE(impl, | |
455 "Benign race on the reference counter of a static " | |
456 "string created by StringImpl::createStatic"); | |
457 | |
458 return impl; | |
459 } | |
460 | |
461 void StringImpl::reserveStaticStringsCapacityForSize(unsigned size) { | |
462 #if DCHECK_IS_ON() | |
463 DCHECK(s_allowCreationOfStaticStrings); | |
464 #endif | |
465 staticStrings().reserveCapacityForSize(size); | |
466 } | |
467 | |
468 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, | |
469 unsigned length) { | |
470 if (!characters || !length) | |
471 return empty; | |
472 | |
473 UChar* data; | |
474 RefPtr<StringImpl> string = createUninitialized(length, data); | |
475 memcpy(data, characters, length * sizeof(UChar)); | |
476 return string.release(); | |
477 } | |
478 | |
479 PassRefPtr<StringImpl> StringImpl::create(const LChar* characters, | |
480 unsigned length) { | |
481 if (!characters || !length) | |
482 return empty; | |
483 | |
484 LChar* data; | |
485 RefPtr<StringImpl> string = createUninitialized(length, data); | |
486 memcpy(data, characters, length * sizeof(LChar)); | |
487 return string.release(); | |
488 } | |
489 | |
490 PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters, | |
491 unsigned length) { | |
492 if (!characters || !length) | |
493 return empty; | |
494 | |
495 LChar* data; | |
496 RefPtr<StringImpl> string = createUninitialized(length, data); | |
497 | |
498 for (size_t i = 0; i < length; ++i) { | |
499 if (characters[i] & 0xff00) | |
500 return create(characters, length); | |
501 data[i] = static_cast<LChar>(characters[i]); | |
502 } | |
503 | |
504 return string.release(); | |
505 } | |
506 | |
507 PassRefPtr<StringImpl> StringImpl::create(const LChar* string) { | |
508 if (!string) | |
509 return empty; | |
510 size_t length = strlen(reinterpret_cast<const char*>(string)); | |
511 RELEASE_ASSERT(length <= numeric_limits<unsigned>::max()); | |
512 return create(string, length); | |
513 } | |
514 | |
515 bool StringImpl::containsOnlyWhitespace() { | |
516 // FIXME: The definition of whitespace here includes a number of characters | |
517 // that are not whitespace from the point of view of LayoutText; I wonder if | |
518 // that's a problem in practice. | |
519 if (is8Bit()) { | |
520 for (unsigned i = 0; i < m_length; ++i) { | |
521 UChar c = characters8()[i]; | |
522 if (!isASCIISpace(c)) | |
523 return false; | |
524 } | |
525 | |
526 return true; | |
527 } | |
528 | |
529 for (unsigned i = 0; i < m_length; ++i) { | |
530 UChar c = characters16()[i]; | |
531 if (!isASCIISpace(c)) | |
532 return false; | |
533 } | |
534 return true; | |
535 } | |
536 | |
537 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, | |
538 unsigned length) const { | |
539 if (start >= m_length) | |
540 return empty; | |
541 unsigned maxLength = m_length - start; | |
542 if (length >= maxLength) { | |
543 // PassRefPtr has trouble dealing with const arguments. It should be updated | |
544 // so this const_cast is not necessary. | |
545 if (!start) | |
546 return const_cast<StringImpl*>(this); | |
547 length = maxLength; | |
548 } | |
549 if (is8Bit()) | |
550 return create(characters8() + start, length); | |
551 | |
552 return create(characters16() + start, length); | |
553 } | |
554 | |
555 UChar32 StringImpl::characterStartingAt(unsigned i) { | |
556 if (is8Bit()) | |
557 return characters8()[i]; | |
558 if (U16_IS_SINGLE(characters16()[i])) | |
559 return characters16()[i]; | |
560 if (i + 1 < m_length && U16_IS_LEAD(characters16()[i]) && | |
561 U16_IS_TRAIL(characters16()[i + 1])) | |
562 return U16_GET_SUPPLEMENTARY(characters16()[i], characters16()[i + 1]); | |
563 return 0; | |
564 } | |
565 | |
566 unsigned StringImpl::copyTo(UChar* buffer, | |
567 unsigned start, | |
568 unsigned maxLength) const { | |
569 unsigned numberOfCharactersToCopy = std::min(length() - start, maxLength); | |
570 if (!numberOfCharactersToCopy) | |
571 return 0; | |
572 if (is8Bit()) | |
573 copyChars(buffer, characters8() + start, numberOfCharactersToCopy); | |
574 else | |
575 copyChars(buffer, characters16() + start, numberOfCharactersToCopy); | |
576 return numberOfCharactersToCopy; | |
577 } | |
578 | |
579 PassRefPtr<StringImpl> StringImpl::lowerASCII() { | |
580 // First scan the string for uppercase and non-ASCII characters: | |
581 if (is8Bit()) { | |
582 unsigned firstIndexToBeLowered = m_length; | |
583 for (unsigned i = 0; i < m_length; ++i) { | |
584 LChar ch = characters8()[i]; | |
585 if (isASCIIUpper(ch)) { | |
586 firstIndexToBeLowered = i; | |
587 break; | |
588 } | |
589 } | |
590 | |
591 // Nothing to do if the string is all ASCII with no uppercase. | |
592 if (firstIndexToBeLowered == m_length) { | |
593 return this; | |
594 } | |
595 | |
596 LChar* data8; | |
597 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8); | |
598 memcpy(data8, characters8(), firstIndexToBeLowered); | |
599 | |
600 for (unsigned i = firstIndexToBeLowered; i < m_length; ++i) { | |
601 LChar ch = characters8()[i]; | |
602 data8[i] = isASCIIUpper(ch) ? toASCIILower(ch) : ch; | |
603 } | |
604 return newImpl.release(); | |
605 } | |
606 bool noUpper = true; | |
607 UChar ored = 0; | |
608 | |
609 const UChar* end = characters16() + m_length; | |
610 for (const UChar* chp = characters16(); chp != end; ++chp) { | |
611 if (isASCIIUpper(*chp)) | |
612 noUpper = false; | |
613 ored |= *chp; | |
614 } | |
615 // Nothing to do if the string is all ASCII with no uppercase. | |
616 if (noUpper && !(ored & ~0x7F)) | |
617 return this; | |
618 | |
619 RELEASE_ASSERT(m_length <= | |
620 static_cast<unsigned>(numeric_limits<unsigned>::max())); | |
621 unsigned length = m_length; | |
622 | |
623 UChar* data16; | |
624 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); | |
625 | |
626 for (unsigned i = 0; i < length; ++i) { | |
627 UChar c = characters16()[i]; | |
628 data16[i] = isASCIIUpper(c) ? toASCIILower(c) : c; | |
629 } | |
630 return newImpl.release(); | |
631 } | |
632 | |
633 PassRefPtr<StringImpl> StringImpl::lower() { | |
634 // Note: This is a hot function in the Dromaeo benchmark, specifically the | |
635 // no-op code path up through the first 'return' statement. | |
636 | |
637 // First scan the string for uppercase and non-ASCII characters: | |
638 if (is8Bit()) { | |
639 unsigned firstIndexToBeLowered = m_length; | |
640 for (unsigned i = 0; i < m_length; ++i) { | |
641 LChar ch = characters8()[i]; | |
642 if (UNLIKELY(isASCIIUpper(ch) || ch & ~0x7F)) { | |
643 firstIndexToBeLowered = i; | |
644 break; | |
645 } | |
646 } | |
647 | |
648 // Nothing to do if the string is all ASCII with no uppercase. | |
649 if (firstIndexToBeLowered == m_length) | |
650 return this; | |
651 | |
652 LChar* data8; | |
653 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8); | |
654 memcpy(data8, characters8(), firstIndexToBeLowered); | |
655 | |
656 for (unsigned i = firstIndexToBeLowered; i < m_length; ++i) { | |
657 LChar ch = characters8()[i]; | |
658 data8[i] = UNLIKELY(ch & ~0x7F) ? static_cast<LChar>(Unicode::toLower(ch)) | |
659 : toASCIILower(ch); | |
660 } | |
661 | |
662 return newImpl.release(); | |
663 } | |
664 | |
665 bool noUpper = true; | |
666 UChar ored = 0; | |
667 | |
668 const UChar* end = characters16() + m_length; | |
669 for (const UChar* chp = characters16(); chp != end; ++chp) { | |
670 if (UNLIKELY(isASCIIUpper(*chp))) | |
671 noUpper = false; | |
672 ored |= *chp; | |
673 } | |
674 // Nothing to do if the string is all ASCII with no uppercase. | |
675 if (noUpper && !(ored & ~0x7F)) | |
676 return this; | |
677 | |
678 RELEASE_ASSERT(m_length <= | |
679 static_cast<unsigned>(numeric_limits<int32_t>::max())); | |
680 int32_t length = m_length; | |
681 | |
682 if (!(ored & ~0x7F)) { | |
683 UChar* data16; | |
684 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); | |
685 | |
686 for (int32_t i = 0; i < length; ++i) { | |
687 UChar c = characters16()[i]; | |
688 data16[i] = toASCIILower(c); | |
689 } | |
690 return newImpl.release(); | |
691 } | |
692 | |
693 // Do a slower implementation for cases that include non-ASCII characters. | |
694 UChar* data16; | |
695 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); | |
696 | |
697 bool error; | |
698 int32_t realLength = | |
699 Unicode::toLower(data16, length, characters16(), m_length, &error); | |
700 if (!error && realLength == length) | |
701 return newImpl.release(); | |
702 | |
703 newImpl = createUninitialized(realLength, data16); | |
704 Unicode::toLower(data16, realLength, characters16(), m_length, &error); | |
705 if (error) | |
706 return this; | |
707 return newImpl.release(); | |
708 } | |
709 | |
710 PassRefPtr<StringImpl> StringImpl::upper() { | |
711 // This function could be optimized for no-op cases the way lower() is, | |
712 // but in empirical testing, few actual calls to upper() are no-ops, so | |
713 // it wouldn't be worth the extra time for pre-scanning. | |
714 | |
715 RELEASE_ASSERT(m_length <= | |
716 static_cast<unsigned>(numeric_limits<int32_t>::max())); | |
717 int32_t length = m_length; | |
718 | |
719 if (is8Bit()) { | |
720 LChar* data8; | |
721 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8); | |
722 | |
723 // Do a faster loop for the case where all the characters are ASCII. | |
724 LChar ored = 0; | |
725 for (int i = 0; i < length; ++i) { | |
726 LChar c = characters8()[i]; | |
727 ored |= c; | |
728 data8[i] = toASCIIUpper(c); | |
729 } | |
730 if (!(ored & ~0x7F)) | |
731 return newImpl.release(); | |
732 | |
733 // Do a slower implementation for cases that include non-ASCII Latin-1 | |
734 // characters. | |
735 int numberSharpSCharacters = 0; | |
736 | |
737 // There are two special cases. | |
738 // 1. latin-1 characters when converted to upper case are 16 bit | |
739 // characters. | |
740 // 2. Lower case sharp-S converts to "SS" (two characters) | |
741 for (int32_t i = 0; i < length; ++i) { | |
742 LChar c = characters8()[i]; | |
743 if (UNLIKELY(c == smallLetterSharpSCharacter)) | |
744 ++numberSharpSCharacters; | |
745 UChar upper = static_cast<UChar>(Unicode::toUpper(c)); | |
746 if (UNLIKELY(upper > 0xff)) { | |
747 // Since this upper-cased character does not fit in an 8-bit string, we | |
748 // need to take the 16-bit path. | |
749 goto upconvert; | |
750 } | |
751 data8[i] = static_cast<LChar>(upper); | |
752 } | |
753 | |
754 if (!numberSharpSCharacters) | |
755 return newImpl.release(); | |
756 | |
757 // We have numberSSCharacters sharp-s characters, but none of the other | |
758 // special characters. | |
759 newImpl = createUninitialized(m_length + numberSharpSCharacters, data8); | |
760 | |
761 LChar* dest = data8; | |
762 | |
763 for (int32_t i = 0; i < length; ++i) { | |
764 LChar c = characters8()[i]; | |
765 if (c == smallLetterSharpSCharacter) { | |
766 *dest++ = 'S'; | |
767 *dest++ = 'S'; | |
768 } else { | |
769 *dest++ = static_cast<LChar>(Unicode::toUpper(c)); | |
770 } | |
771 } | |
772 | |
773 return newImpl.release(); | |
774 } | |
775 | |
776 upconvert: | |
777 RefPtr<StringImpl> upconverted = upconvertedString(); | |
778 const UChar* source16 = upconverted->characters16(); | |
779 | |
780 UChar* data16; | |
781 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); | |
782 | |
783 // Do a faster loop for the case where all the characters are ASCII. | |
784 UChar ored = 0; | |
785 for (int i = 0; i < length; ++i) { | |
786 UChar c = source16[i]; | |
787 ored |= c; | |
788 data16[i] = toASCIIUpper(c); | |
789 } | |
790 if (!(ored & ~0x7F)) | |
791 return newImpl.release(); | |
792 | |
793 // Do a slower implementation for cases that include non-ASCII characters. | |
794 bool error; | |
795 int32_t realLength = | |
796 Unicode::toUpper(data16, length, source16, m_length, &error); | |
797 if (!error && realLength == length) | |
798 return newImpl; | |
799 newImpl = createUninitialized(realLength, data16); | |
800 Unicode::toUpper(data16, realLength, source16, m_length, &error); | |
801 if (error) | |
802 return this; | |
803 return newImpl.release(); | |
804 } | |
805 | |
806 PassRefPtr<StringImpl> StringImpl::upperASCII() { | |
807 if (is8Bit()) { | |
808 LChar* data8; | |
809 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8); | |
810 | |
811 for (unsigned i = 0; i < m_length; ++i) { | |
812 LChar c = characters8()[i]; | |
813 data8[i] = isASCIILower(c) ? toASCIIUpper(c) : c; | |
814 } | |
815 return newImpl.release(); | |
816 } | |
817 | |
818 UChar* data16; | |
819 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); | |
820 | |
821 for (unsigned i = 0; i < m_length; ++i) { | |
822 UChar c = characters16()[i]; | |
823 data16[i] = isASCIILower(c) ? toASCIIUpper(c) : c; | |
824 } | |
825 return newImpl.release(); | |
826 } | |
827 | |
828 static inline bool localeIdMatchesLang(const AtomicString& localeId, | |
829 const StringView& lang) { | |
830 RELEASE_ASSERT(lang.length() >= 2 && lang.length() <= 3); | |
831 if (!localeId.impl() || !localeId.impl()->startsWithIgnoringCase(lang)) | |
832 return false; | |
833 if (localeId.impl()->length() == lang.length()) | |
834 return true; | |
835 const UChar maybeDelimiter = (*localeId.impl())[lang.length()]; | |
836 return maybeDelimiter == '-' || maybeDelimiter == '_' || | |
837 maybeDelimiter == '@'; | |
838 } | |
839 | |
840 typedef int32_t (*icuCaseConverter)(UChar*, | |
841 int32_t, | |
842 const UChar*, | |
843 int32_t, | |
844 const char*, | |
845 UErrorCode*); | |
846 | |
847 static PassRefPtr<StringImpl> caseConvert(const UChar* source16, | |
848 size_t length, | |
849 icuCaseConverter converter, | |
850 const char* locale, | |
851 StringImpl* originalString) { | |
852 UChar* data16; | |
853 size_t targetLength = length; | |
854 RefPtr<StringImpl> output = StringImpl::createUninitialized(length, data16); | |
855 do { | |
856 UErrorCode status = U_ZERO_ERROR; | |
857 targetLength = | |
858 converter(data16, targetLength, source16, length, locale, &status); | |
859 if (U_SUCCESS(status)) { | |
860 if (length > 0) | |
861 return output->substring(0, targetLength); | |
862 return output.release(); | |
863 } | |
864 if (status != U_BUFFER_OVERFLOW_ERROR) | |
865 return originalString; | |
866 // Expand the buffer. | |
867 output = StringImpl::createUninitialized(targetLength, data16); | |
868 } while (true); | |
869 } | |
870 | |
871 PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier) { | |
872 // Use the more optimized code path most of the time. | |
873 // Only Turkic (tr and az) languages and Lithuanian requires | |
874 // locale-specific lowercasing rules. Even though CLDR has el-Lower, | |
875 // it's identical to the locale-agnostic lowercasing. Context-dependent | |
876 // handling of Greek capital sigma is built into the common lowercasing | |
877 // function in ICU. | |
878 const char* localeForConversion = 0; | |
879 if (localeIdMatchesLang(localeIdentifier, "tr") || | |
880 localeIdMatchesLang(localeIdentifier, "az")) | |
881 localeForConversion = "tr"; | |
882 else if (localeIdMatchesLang(localeIdentifier, "lt")) | |
883 localeForConversion = "lt"; | |
884 else | |
885 return lower(); | |
886 | |
887 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max())) | |
888 CRASH(); | |
889 int length = m_length; | |
890 | |
891 RefPtr<StringImpl> upconverted = upconvertedString(); | |
892 const UChar* source16 = upconverted->characters16(); | |
893 return caseConvert(source16, length, u_strToLower, localeForConversion, this); | |
894 } | |
895 | |
896 PassRefPtr<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier) { | |
897 // Use the more-optimized code path most of the time. | |
898 // Only Turkic (tr and az) languages, Greek and Lithuanian require | |
899 // locale-specific uppercasing rules. | |
900 const char* localeForConversion = 0; | |
901 if (localeIdMatchesLang(localeIdentifier, "tr") || | |
902 localeIdMatchesLang(localeIdentifier, "az")) | |
903 localeForConversion = "tr"; | |
904 else if (localeIdMatchesLang(localeIdentifier, "el")) | |
905 localeForConversion = "el"; | |
906 else if (localeIdMatchesLang(localeIdentifier, "lt")) | |
907 localeForConversion = "lt"; | |
908 else | |
909 return upper(); | |
910 | |
911 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max())) | |
912 CRASH(); | |
913 int length = m_length; | |
914 | |
915 RefPtr<StringImpl> upconverted = upconvertedString(); | |
916 const UChar* source16 = upconverted->characters16(); | |
917 | |
918 return caseConvert(source16, length, u_strToUpper, localeForConversion, this); | |
919 } | |
920 | |
921 PassRefPtr<StringImpl> StringImpl::fill(UChar character) { | |
922 if (!(character & ~0x7F)) { | |
923 LChar* data; | |
924 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
925 for (unsigned i = 0; i < m_length; ++i) | |
926 data[i] = static_cast<LChar>(character); | |
927 return newImpl.release(); | |
928 } | |
929 UChar* data; | |
930 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
931 for (unsigned i = 0; i < m_length; ++i) | |
932 data[i] = character; | |
933 return newImpl.release(); | |
934 } | |
935 | |
936 PassRefPtr<StringImpl> StringImpl::foldCase() { | |
937 RELEASE_ASSERT(m_length <= | |
938 static_cast<unsigned>(numeric_limits<int32_t>::max())); | |
939 int32_t length = m_length; | |
940 | |
941 if (is8Bit()) { | |
942 // Do a faster loop for the case where all the characters are ASCII. | |
943 LChar* data; | |
944 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
945 LChar ored = 0; | |
946 | |
947 for (int32_t i = 0; i < length; ++i) { | |
948 LChar c = characters8()[i]; | |
949 data[i] = toASCIILower(c); | |
950 ored |= c; | |
951 } | |
952 | |
953 if (!(ored & ~0x7F)) | |
954 return newImpl.release(); | |
955 | |
956 // Do a slower implementation for cases that include non-ASCII Latin-1 | |
957 // characters. | |
958 for (int32_t i = 0; i < length; ++i) | |
959 data[i] = static_cast<LChar>(Unicode::toLower(characters8()[i])); | |
960 | |
961 return newImpl.release(); | |
962 } | |
963 | |
964 // Do a faster loop for the case where all the characters are ASCII. | |
965 UChar* data; | |
966 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
967 UChar ored = 0; | |
968 for (int32_t i = 0; i < length; ++i) { | |
969 UChar c = characters16()[i]; | |
970 ored |= c; | |
971 data[i] = toASCIILower(c); | |
972 } | |
973 if (!(ored & ~0x7F)) | |
974 return newImpl.release(); | |
975 | |
976 // Do a slower implementation for cases that include non-ASCII characters. | |
977 bool error; | |
978 int32_t realLength = | |
979 Unicode::foldCase(data, length, characters16(), m_length, &error); | |
980 if (!error && realLength == length) | |
981 return newImpl.release(); | |
982 newImpl = createUninitialized(realLength, data); | |
983 Unicode::foldCase(data, realLength, characters16(), m_length, &error); | |
984 if (error) | |
985 return this; | |
986 return newImpl.release(); | |
987 } | |
988 | |
989 PassRefPtr<StringImpl> StringImpl::truncate(unsigned length) { | |
990 if (length >= m_length) | |
991 return this; | |
992 if (is8Bit()) | |
993 return create(characters8(), length); | |
994 return create(characters16(), length); | |
995 } | |
996 | |
997 template <class UCharPredicate> | |
998 inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters( | |
999 UCharPredicate predicate) { | |
1000 if (!m_length) | |
1001 return empty; | |
1002 | |
1003 unsigned start = 0; | |
1004 unsigned end = m_length - 1; | |
1005 | |
1006 // skip white space from start | |
1007 while (start <= end && | |
1008 predicate(is8Bit() ? characters8()[start] : characters16()[start])) | |
1009 ++start; | |
1010 | |
1011 // only white space | |
1012 if (start > end) | |
1013 return empty; | |
1014 | |
1015 // skip white space from end | |
1016 while (end && predicate(is8Bit() ? characters8()[end] : characters16()[end])) | |
1017 --end; | |
1018 | |
1019 if (!start && end == m_length - 1) | |
1020 return this; | |
1021 if (is8Bit()) | |
1022 return create(characters8() + start, end + 1 - start); | |
1023 return create(characters16() + start, end + 1 - start); | |
1024 } | |
1025 | |
1026 class UCharPredicate final { | |
1027 STACK_ALLOCATED(); | |
1028 | |
1029 public: | |
1030 inline UCharPredicate(CharacterMatchFunctionPtr function) | |
1031 : m_function(function) {} | |
1032 | |
1033 inline bool operator()(UChar ch) const { return m_function(ch); } | |
1034 | |
1035 private: | |
1036 const CharacterMatchFunctionPtr m_function; | |
1037 }; | |
1038 | |
1039 class SpaceOrNewlinePredicate final { | |
1040 STACK_ALLOCATED(); | |
1041 | |
1042 public: | |
1043 inline bool operator()(UChar ch) const { return isSpaceOrNewline(ch); } | |
1044 }; | |
1045 | |
1046 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace() { | |
1047 return stripMatchedCharacters(SpaceOrNewlinePredicate()); | |
1048 } | |
1049 | |
1050 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace( | |
1051 IsWhiteSpaceFunctionPtr isWhiteSpace) { | |
1052 return stripMatchedCharacters(UCharPredicate(isWhiteSpace)); | |
1053 } | |
1054 | |
1055 template <typename CharType> | |
1056 ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters( | |
1057 const CharType* characters, | |
1058 CharacterMatchFunctionPtr findMatch) { | |
1059 const CharType* from = characters; | |
1060 const CharType* fromend = from + m_length; | |
1061 | |
1062 // Assume the common case will not remove any characters | |
1063 while (from != fromend && !findMatch(*from)) | |
1064 ++from; | |
1065 if (from == fromend) | |
1066 return this; | |
1067 | |
1068 StringBuffer<CharType> data(m_length); | |
1069 CharType* to = data.characters(); | |
1070 unsigned outc = from - characters; | |
1071 | |
1072 if (outc) | |
1073 memcpy(to, characters, outc * sizeof(CharType)); | |
1074 | |
1075 while (true) { | |
1076 while (from != fromend && findMatch(*from)) | |
1077 ++from; | |
1078 while (from != fromend && !findMatch(*from)) | |
1079 to[outc++] = *from++; | |
1080 if (from == fromend) | |
1081 break; | |
1082 } | |
1083 | |
1084 data.shrink(outc); | |
1085 | |
1086 return data.release(); | |
1087 } | |
1088 | |
1089 PassRefPtr<StringImpl> StringImpl::removeCharacters( | |
1090 CharacterMatchFunctionPtr findMatch) { | |
1091 if (is8Bit()) | |
1092 return removeCharacters(characters8(), findMatch); | |
1093 return removeCharacters(characters16(), findMatch); | |
1094 } | |
1095 | |
1096 PassRefPtr<StringImpl> StringImpl::remove(unsigned start, | |
1097 unsigned lengthToRemove) { | |
1098 if (lengthToRemove <= 0) | |
1099 return this; | |
1100 if (start >= m_length) | |
1101 return this; | |
1102 | |
1103 lengthToRemove = std::min(m_length - start, lengthToRemove); | |
1104 unsigned removedEnd = start + lengthToRemove; | |
1105 | |
1106 if (is8Bit()) { | |
1107 StringBuffer<LChar> buffer(m_length - lengthToRemove); | |
1108 copyChars(buffer.characters(), characters8(), start); | |
1109 copyChars(buffer.characters() + start, characters8() + removedEnd, | |
1110 m_length - removedEnd); | |
1111 return buffer.release(); | |
1112 } | |
1113 StringBuffer<UChar> buffer(m_length - lengthToRemove); | |
1114 copyChars(buffer.characters(), characters16(), start); | |
1115 copyChars(buffer.characters() + start, characters16() + removedEnd, | |
1116 m_length - removedEnd); | |
1117 return buffer.release(); | |
1118 } | |
1119 | |
1120 template <typename CharType, class UCharPredicate> | |
1121 inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace( | |
1122 UCharPredicate predicate, | |
1123 StripBehavior stripBehavior) { | |
1124 StringBuffer<CharType> data(m_length); | |
1125 | |
1126 const CharType* from = getCharacters<CharType>(); | |
1127 const CharType* fromend = from + m_length; | |
1128 int outc = 0; | |
1129 bool changedToSpace = false; | |
1130 | |
1131 CharType* to = data.characters(); | |
1132 | |
1133 if (stripBehavior == StripExtraWhiteSpace) { | |
1134 while (true) { | |
1135 while (from != fromend && predicate(*from)) { | |
1136 if (*from != ' ') | |
1137 changedToSpace = true; | |
1138 ++from; | |
1139 } | |
1140 while (from != fromend && !predicate(*from)) | |
1141 to[outc++] = *from++; | |
1142 if (from != fromend) | |
1143 to[outc++] = ' '; | |
1144 else | |
1145 break; | |
1146 } | |
1147 | |
1148 if (outc > 0 && to[outc - 1] == ' ') | |
1149 --outc; | |
1150 } else { | |
1151 for (; from != fromend; ++from) { | |
1152 if (predicate(*from)) { | |
1153 if (*from != ' ') | |
1154 changedToSpace = true; | |
1155 to[outc++] = ' '; | |
1156 } else { | |
1157 to[outc++] = *from; | |
1158 } | |
1159 } | |
1160 } | |
1161 | |
1162 if (static_cast<unsigned>(outc) == m_length && !changedToSpace) | |
1163 return this; | |
1164 | |
1165 data.shrink(outc); | |
1166 | |
1167 return data.release(); | |
1168 } | |
1169 | |
1170 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace( | |
1171 StripBehavior stripBehavior) { | |
1172 if (is8Bit()) | |
1173 return StringImpl::simplifyMatchedCharactersToSpace<LChar>( | |
1174 SpaceOrNewlinePredicate(), stripBehavior); | |
1175 return StringImpl::simplifyMatchedCharactersToSpace<UChar>( | |
1176 SpaceOrNewlinePredicate(), stripBehavior); | |
1177 } | |
1178 | |
1179 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace( | |
1180 IsWhiteSpaceFunctionPtr isWhiteSpace, | |
1181 StripBehavior stripBehavior) { | |
1182 if (is8Bit()) | |
1183 return StringImpl::simplifyMatchedCharactersToSpace<LChar>( | |
1184 UCharPredicate(isWhiteSpace), stripBehavior); | |
1185 return StringImpl::simplifyMatchedCharactersToSpace<UChar>( | |
1186 UCharPredicate(isWhiteSpace), stripBehavior); | |
1187 } | |
1188 | |
1189 int StringImpl::toIntStrict(bool* ok, int base) { | |
1190 if (is8Bit()) | |
1191 return charactersToIntStrict(characters8(), m_length, ok, base); | |
1192 return charactersToIntStrict(characters16(), m_length, ok, base); | |
1193 } | |
1194 | |
1195 unsigned StringImpl::toUIntStrict(bool* ok, int base) { | |
1196 if (is8Bit()) | |
1197 return charactersToUIntStrict(characters8(), m_length, ok, base); | |
1198 return charactersToUIntStrict(characters16(), m_length, ok, base); | |
1199 } | |
1200 | |
1201 int64_t StringImpl::toInt64Strict(bool* ok, int base) { | |
1202 if (is8Bit()) | |
1203 return charactersToInt64Strict(characters8(), m_length, ok, base); | |
1204 return charactersToInt64Strict(characters16(), m_length, ok, base); | |
1205 } | |
1206 | |
1207 uint64_t StringImpl::toUInt64Strict(bool* ok, int base) { | |
1208 if (is8Bit()) | |
1209 return charactersToUInt64Strict(characters8(), m_length, ok, base); | |
1210 return charactersToUInt64Strict(characters16(), m_length, ok, base); | |
1211 } | |
1212 | |
1213 int StringImpl::toInt(bool* ok) { | |
1214 if (is8Bit()) | |
1215 return charactersToInt(characters8(), m_length, ok); | |
1216 return charactersToInt(characters16(), m_length, ok); | |
1217 } | |
1218 | |
1219 unsigned StringImpl::toUInt(bool* ok) { | |
1220 if (is8Bit()) | |
1221 return charactersToUInt(characters8(), m_length, ok); | |
1222 return charactersToUInt(characters16(), m_length, ok); | |
1223 } | |
1224 | |
1225 int64_t StringImpl::toInt64(bool* ok) { | |
1226 if (is8Bit()) | |
1227 return charactersToInt64(characters8(), m_length, ok); | |
1228 return charactersToInt64(characters16(), m_length, ok); | |
1229 } | |
1230 | |
1231 uint64_t StringImpl::toUInt64(bool* ok) { | |
1232 if (is8Bit()) | |
1233 return charactersToUInt64(characters8(), m_length, ok); | |
1234 return charactersToUInt64(characters16(), m_length, ok); | |
1235 } | |
1236 | |
1237 double StringImpl::toDouble(bool* ok) { | |
1238 if (is8Bit()) | |
1239 return charactersToDouble(characters8(), m_length, ok); | |
1240 return charactersToDouble(characters16(), m_length, ok); | |
1241 } | |
1242 | |
1243 float StringImpl::toFloat(bool* ok) { | |
1244 if (is8Bit()) | |
1245 return charactersToFloat(characters8(), m_length, ok); | |
1246 return charactersToFloat(characters16(), m_length, ok); | |
1247 } | |
1248 | |
1249 // Table is based on ftp://ftp.unicode.org/Public/UNIDATA/CaseFolding.txt | |
1250 const UChar StringImpl::latin1CaseFoldTable[256] = { | |
1251 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, | |
1252 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011, | |
1253 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, | |
1254 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023, | |
1255 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, | |
1256 0x002d, 0x002e, 0x002f, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, | |
1257 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, | |
1258 0x003f, 0x0040, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, | |
1259 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 0x0070, | |
1260 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, | |
1261 0x007a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 0x0060, 0x0061, 0x0062, | |
1262 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, | |
1263 0x006c, 0x006d, 0x006e, 0x006f, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, | |
1264 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, | |
1265 0x007e, 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, | |
1266 0x0087, 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, | |
1267 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, | |
1268 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, | |
1269 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00aa, | |
1270 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0, 0x00b1, 0x00b2, 0x00b3, | |
1271 0x00b4, 0x03bc, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, | |
1272 0x00bd, 0x00be, 0x00bf, 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, | |
1273 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, | |
1274 0x00ef, 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00d7, | |
1275 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00df, 0x00e0, | |
1276 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, | |
1277 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 0x00f0, 0x00f1, 0x00f2, | |
1278 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00f9, 0x00fa, 0x00fb, | |
1279 0x00fc, 0x00fd, 0x00fe, 0x00ff, | |
1280 }; | |
1281 | |
1282 bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length) { | |
1283 DCHECK_GE(length, 0u); | |
1284 if (a == b) | |
1285 return true; | |
1286 while (length--) { | |
1287 if (StringImpl::latin1CaseFoldTable[*a++] != | |
1288 StringImpl::latin1CaseFoldTable[*b++]) | |
1289 return false; | |
1290 } | |
1291 return true; | |
1292 } | |
1293 | |
1294 bool equalIgnoringCase(const UChar* a, const UChar* b, unsigned length) { | |
1295 DCHECK_GE(length, 0u); | |
1296 if (a == b) | |
1297 return true; | |
1298 return !Unicode::umemcasecmp(a, b, length); | |
1299 } | |
1300 | |
1301 bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length) { | |
1302 while (length--) { | |
1303 if (foldCase(*a++) != StringImpl::latin1CaseFoldTable[*b++]) | |
1304 return false; | |
1305 } | |
1306 return true; | |
1307 } | |
1308 | |
1309 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, | |
1310 unsigned start) { | |
1311 if (is8Bit()) | |
1312 return WTF::find(characters8(), m_length, matchFunction, start); | |
1313 return WTF::find(characters16(), m_length, matchFunction, start); | |
1314 } | |
1315 | |
1316 template <typename SearchCharacterType, typename MatchCharacterType> | |
1317 ALWAYS_INLINE static size_t findInternal( | |
1318 const SearchCharacterType* searchCharacters, | |
1319 const MatchCharacterType* matchCharacters, | |
1320 unsigned index, | |
1321 unsigned searchLength, | |
1322 unsigned matchLength) { | |
1323 // Optimization: keep a running hash of the strings, | |
1324 // only call equal() if the hashes match. | |
1325 | |
1326 // delta is the number of additional times to test; delta == 0 means test only | |
1327 // once. | |
1328 unsigned delta = searchLength - matchLength; | |
1329 | |
1330 unsigned searchHash = 0; | |
1331 unsigned matchHash = 0; | |
1332 | |
1333 for (unsigned i = 0; i < matchLength; ++i) { | |
1334 searchHash += searchCharacters[i]; | |
1335 matchHash += matchCharacters[i]; | |
1336 } | |
1337 | |
1338 unsigned i = 0; | |
1339 // keep looping until we match | |
1340 while (searchHash != matchHash || | |
1341 !equal(searchCharacters + i, matchCharacters, matchLength)) { | |
1342 if (i == delta) | |
1343 return kNotFound; | |
1344 searchHash += searchCharacters[i + matchLength]; | |
1345 searchHash -= searchCharacters[i]; | |
1346 ++i; | |
1347 } | |
1348 return index + i; | |
1349 } | |
1350 | |
1351 size_t StringImpl::find(const StringView& matchString, unsigned index) { | |
1352 if (UNLIKELY(matchString.isNull())) | |
1353 return kNotFound; | |
1354 | |
1355 unsigned matchLength = matchString.length(); | |
1356 | |
1357 // Optimization 1: fast case for strings of length 1. | |
1358 if (matchLength == 1) { | |
1359 if (is8Bit()) | |
1360 return WTF::find(characters8(), length(), matchString[0], index); | |
1361 return WTF::find(characters16(), length(), matchString[0], index); | |
1362 } | |
1363 | |
1364 if (UNLIKELY(!matchLength)) | |
1365 return min(index, length()); | |
1366 | |
1367 // Check index & matchLength are in range. | |
1368 if (index > length()) | |
1369 return kNotFound; | |
1370 unsigned searchLength = length() - index; | |
1371 if (matchLength > searchLength) | |
1372 return kNotFound; | |
1373 | |
1374 if (is8Bit()) { | |
1375 if (matchString.is8Bit()) | |
1376 return findInternal(characters8() + index, matchString.characters8(), | |
1377 index, searchLength, matchLength); | |
1378 return findInternal(characters8() + index, matchString.characters16(), | |
1379 index, searchLength, matchLength); | |
1380 } | |
1381 if (matchString.is8Bit()) | |
1382 return findInternal(characters16() + index, matchString.characters8(), | |
1383 index, searchLength, matchLength); | |
1384 return findInternal(characters16() + index, matchString.characters16(), index, | |
1385 searchLength, matchLength); | |
1386 } | |
1387 | |
1388 template <typename SearchCharacterType, typename MatchCharacterType> | |
1389 ALWAYS_INLINE static size_t findIgnoringCaseInternal( | |
1390 const SearchCharacterType* searchCharacters, | |
1391 const MatchCharacterType* matchCharacters, | |
1392 unsigned index, | |
1393 unsigned searchLength, | |
1394 unsigned matchLength) { | |
1395 // delta is the number of additional times to test; delta == 0 means test only | |
1396 // once. | |
1397 unsigned delta = searchLength - matchLength; | |
1398 | |
1399 unsigned i = 0; | |
1400 // keep looping until we match | |
1401 while ( | |
1402 !equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) { | |
1403 if (i == delta) | |
1404 return kNotFound; | |
1405 ++i; | |
1406 } | |
1407 return index + i; | |
1408 } | |
1409 | |
1410 size_t StringImpl::findIgnoringCase(const StringView& matchString, | |
1411 unsigned index) { | |
1412 if (UNLIKELY(matchString.isNull())) | |
1413 return kNotFound; | |
1414 | |
1415 unsigned matchLength = matchString.length(); | |
1416 if (!matchLength) | |
1417 return min(index, length()); | |
1418 | |
1419 // Check index & matchLength are in range. | |
1420 if (index > length()) | |
1421 return kNotFound; | |
1422 unsigned searchLength = length() - index; | |
1423 if (matchLength > searchLength) | |
1424 return kNotFound; | |
1425 | |
1426 if (is8Bit()) { | |
1427 if (matchString.is8Bit()) | |
1428 return findIgnoringCaseInternal(characters8() + index, | |
1429 matchString.characters8(), index, | |
1430 searchLength, matchLength); | |
1431 return findIgnoringCaseInternal(characters8() + index, | |
1432 matchString.characters16(), index, | |
1433 searchLength, matchLength); | |
1434 } | |
1435 if (matchString.is8Bit()) | |
1436 return findIgnoringCaseInternal(characters16() + index, | |
1437 matchString.characters8(), index, | |
1438 searchLength, matchLength); | |
1439 return findIgnoringCaseInternal(characters16() + index, | |
1440 matchString.characters16(), index, | |
1441 searchLength, matchLength); | |
1442 } | |
1443 | |
1444 template <typename SearchCharacterType, typename MatchCharacterType> | |
1445 ALWAYS_INLINE static size_t findIgnoringASCIICaseInternal( | |
1446 const SearchCharacterType* searchCharacters, | |
1447 const MatchCharacterType* matchCharacters, | |
1448 unsigned index, | |
1449 unsigned searchLength, | |
1450 unsigned matchLength) { | |
1451 // delta is the number of additional times to test; delta == 0 means test only | |
1452 // once. | |
1453 unsigned delta = searchLength - matchLength; | |
1454 | |
1455 unsigned i = 0; | |
1456 // keep looping until we match | |
1457 while (!equalIgnoringASCIICase(searchCharacters + i, matchCharacters, | |
1458 matchLength)) { | |
1459 if (i == delta) | |
1460 return kNotFound; | |
1461 ++i; | |
1462 } | |
1463 return index + i; | |
1464 } | |
1465 | |
1466 size_t StringImpl::findIgnoringASCIICase(const StringView& matchString, | |
1467 unsigned index) { | |
1468 if (UNLIKELY(matchString.isNull())) | |
1469 return kNotFound; | |
1470 | |
1471 unsigned matchLength = matchString.length(); | |
1472 if (!matchLength) | |
1473 return min(index, length()); | |
1474 | |
1475 // Check index & matchLength are in range. | |
1476 if (index > length()) | |
1477 return kNotFound; | |
1478 unsigned searchLength = length() - index; | |
1479 if (matchLength > searchLength) | |
1480 return kNotFound; | |
1481 | |
1482 if (is8Bit()) { | |
1483 if (matchString.is8Bit()) | |
1484 return findIgnoringASCIICaseInternal(characters8() + index, | |
1485 matchString.characters8(), index, | |
1486 searchLength, matchLength); | |
1487 return findIgnoringASCIICaseInternal(characters8() + index, | |
1488 matchString.characters16(), index, | |
1489 searchLength, matchLength); | |
1490 } | |
1491 if (matchString.is8Bit()) | |
1492 return findIgnoringASCIICaseInternal(characters16() + index, | |
1493 matchString.characters8(), index, | |
1494 searchLength, matchLength); | |
1495 return findIgnoringASCIICaseInternal(characters16() + index, | |
1496 matchString.characters16(), index, | |
1497 searchLength, matchLength); | |
1498 } | |
1499 | |
1500 size_t StringImpl::reverseFind(UChar c, unsigned index) { | |
1501 if (is8Bit()) | |
1502 return WTF::reverseFind(characters8(), m_length, c, index); | |
1503 return WTF::reverseFind(characters16(), m_length, c, index); | |
1504 } | |
1505 | |
1506 template <typename SearchCharacterType, typename MatchCharacterType> | |
1507 ALWAYS_INLINE static size_t reverseFindInternal( | |
1508 const SearchCharacterType* searchCharacters, | |
1509 const MatchCharacterType* matchCharacters, | |
1510 unsigned index, | |
1511 unsigned length, | |
1512 unsigned matchLength) { | |
1513 // Optimization: keep a running hash of the strings, | |
1514 // only call equal if the hashes match. | |
1515 | |
1516 // delta is the number of additional times to test; delta == 0 means test only | |
1517 // once. | |
1518 unsigned delta = min(index, length - matchLength); | |
1519 | |
1520 unsigned searchHash = 0; | |
1521 unsigned matchHash = 0; | |
1522 for (unsigned i = 0; i < matchLength; ++i) { | |
1523 searchHash += searchCharacters[delta + i]; | |
1524 matchHash += matchCharacters[i]; | |
1525 } | |
1526 | |
1527 // keep looping until we match | |
1528 while (searchHash != matchHash || | |
1529 !equal(searchCharacters + delta, matchCharacters, matchLength)) { | |
1530 if (!delta) | |
1531 return kNotFound; | |
1532 --delta; | |
1533 searchHash -= searchCharacters[delta + matchLength]; | |
1534 searchHash += searchCharacters[delta]; | |
1535 } | |
1536 return delta; | |
1537 } | |
1538 | |
1539 size_t StringImpl::reverseFind(const StringView& matchString, unsigned index) { | |
1540 if (UNLIKELY(matchString.isNull())) | |
1541 return kNotFound; | |
1542 | |
1543 unsigned matchLength = matchString.length(); | |
1544 unsigned ourLength = length(); | |
1545 if (!matchLength) | |
1546 return min(index, ourLength); | |
1547 | |
1548 // Optimization 1: fast case for strings of length 1. | |
1549 if (matchLength == 1) { | |
1550 if (is8Bit()) | |
1551 return WTF::reverseFind(characters8(), ourLength, matchString[0], index); | |
1552 return WTF::reverseFind(characters16(), ourLength, matchString[0], index); | |
1553 } | |
1554 | |
1555 // Check index & matchLength are in range. | |
1556 if (matchLength > ourLength) | |
1557 return kNotFound; | |
1558 | |
1559 if (is8Bit()) { | |
1560 if (matchString.is8Bit()) | |
1561 return reverseFindInternal(characters8(), matchString.characters8(), | |
1562 index, ourLength, matchLength); | |
1563 return reverseFindInternal(characters8(), matchString.characters16(), index, | |
1564 ourLength, matchLength); | |
1565 } | |
1566 if (matchString.is8Bit()) | |
1567 return reverseFindInternal(characters16(), matchString.characters8(), index, | |
1568 ourLength, matchLength); | |
1569 return reverseFindInternal(characters16(), matchString.characters16(), index, | |
1570 ourLength, matchLength); | |
1571 } | |
1572 | |
1573 bool StringImpl::startsWith(UChar character) const { | |
1574 return m_length && (*this)[0] == character; | |
1575 } | |
1576 | |
1577 bool StringImpl::startsWith(const StringView& prefix) const { | |
1578 if (prefix.length() > length()) | |
1579 return false; | |
1580 if (is8Bit()) { | |
1581 if (prefix.is8Bit()) | |
1582 return equal(characters8(), prefix.characters8(), prefix.length()); | |
1583 return equal(characters8(), prefix.characters16(), prefix.length()); | |
1584 } | |
1585 if (prefix.is8Bit()) | |
1586 return equal(characters16(), prefix.characters8(), prefix.length()); | |
1587 return equal(characters16(), prefix.characters16(), prefix.length()); | |
1588 } | |
1589 | |
1590 bool StringImpl::startsWithIgnoringCase(const StringView& prefix) const { | |
1591 if (prefix.length() > length()) | |
1592 return false; | |
1593 if (is8Bit()) { | |
1594 if (prefix.is8Bit()) | |
1595 return equalIgnoringCase(characters8(), prefix.characters8(), | |
1596 prefix.length()); | |
1597 return equalIgnoringCase(characters8(), prefix.characters16(), | |
1598 prefix.length()); | |
1599 } | |
1600 if (prefix.is8Bit()) | |
1601 return equalIgnoringCase(characters16(), prefix.characters8(), | |
1602 prefix.length()); | |
1603 return equalIgnoringCase(characters16(), prefix.characters16(), | |
1604 prefix.length()); | |
1605 } | |
1606 | |
1607 bool StringImpl::startsWithIgnoringASCIICase(const StringView& prefix) const { | |
1608 if (prefix.length() > length()) | |
1609 return false; | |
1610 if (is8Bit()) { | |
1611 if (prefix.is8Bit()) | |
1612 return equalIgnoringASCIICase(characters8(), prefix.characters8(), | |
1613 prefix.length()); | |
1614 return equalIgnoringASCIICase(characters8(), prefix.characters16(), | |
1615 prefix.length()); | |
1616 } | |
1617 if (prefix.is8Bit()) | |
1618 return equalIgnoringASCIICase(characters16(), prefix.characters8(), | |
1619 prefix.length()); | |
1620 return equalIgnoringASCIICase(characters16(), prefix.characters16(), | |
1621 prefix.length()); | |
1622 } | |
1623 | |
1624 bool StringImpl::endsWith(UChar character) const { | |
1625 return m_length && (*this)[m_length - 1] == character; | |
1626 } | |
1627 | |
1628 bool StringImpl::endsWith(const StringView& suffix) const { | |
1629 if (suffix.length() > length()) | |
1630 return false; | |
1631 unsigned startOffset = length() - suffix.length(); | |
1632 if (is8Bit()) { | |
1633 if (suffix.is8Bit()) | |
1634 return equal(characters8() + startOffset, suffix.characters8(), | |
1635 suffix.length()); | |
1636 return equal(characters8() + startOffset, suffix.characters16(), | |
1637 suffix.length()); | |
1638 } | |
1639 if (suffix.is8Bit()) | |
1640 return equal(characters16() + startOffset, suffix.characters8(), | |
1641 suffix.length()); | |
1642 return equal(characters16() + startOffset, suffix.characters16(), | |
1643 suffix.length()); | |
1644 } | |
1645 | |
1646 bool StringImpl::endsWithIgnoringCase(const StringView& suffix) const { | |
1647 if (suffix.length() > length()) | |
1648 return false; | |
1649 unsigned startOffset = length() - suffix.length(); | |
1650 if (is8Bit()) { | |
1651 if (suffix.is8Bit()) | |
1652 return equalIgnoringCase(characters8() + startOffset, | |
1653 suffix.characters8(), suffix.length()); | |
1654 return equalIgnoringCase(characters8() + startOffset, suffix.characters16(), | |
1655 suffix.length()); | |
1656 } | |
1657 if (suffix.is8Bit()) | |
1658 return equalIgnoringCase(characters16() + startOffset, suffix.characters8(), | |
1659 suffix.length()); | |
1660 return equalIgnoringCase(characters16() + startOffset, suffix.characters16(), | |
1661 suffix.length()); | |
1662 } | |
1663 | |
1664 bool StringImpl::endsWithIgnoringASCIICase(const StringView& suffix) const { | |
1665 if (suffix.length() > length()) | |
1666 return false; | |
1667 unsigned startOffset = length() - suffix.length(); | |
1668 if (is8Bit()) { | |
1669 if (suffix.is8Bit()) | |
1670 return equalIgnoringASCIICase(characters8() + startOffset, | |
1671 suffix.characters8(), suffix.length()); | |
1672 return equalIgnoringASCIICase(characters8() + startOffset, | |
1673 suffix.characters16(), suffix.length()); | |
1674 } | |
1675 if (suffix.is8Bit()) | |
1676 return equalIgnoringASCIICase(characters16() + startOffset, | |
1677 suffix.characters8(), suffix.length()); | |
1678 return equalIgnoringASCIICase(characters16() + startOffset, | |
1679 suffix.characters16(), suffix.length()); | |
1680 } | |
1681 | |
1682 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC) { | |
1683 if (oldC == newC) | |
1684 return this; | |
1685 | |
1686 if (find(oldC) == kNotFound) | |
1687 return this; | |
1688 | |
1689 unsigned i; | |
1690 if (is8Bit()) { | |
1691 if (newC <= 0xff) { | |
1692 LChar* data; | |
1693 LChar oldChar = static_cast<LChar>(oldC); | |
1694 LChar newChar = static_cast<LChar>(newC); | |
1695 | |
1696 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
1697 | |
1698 for (i = 0; i != m_length; ++i) { | |
1699 LChar ch = characters8()[i]; | |
1700 if (ch == oldChar) | |
1701 ch = newChar; | |
1702 data[i] = ch; | |
1703 } | |
1704 return newImpl.release(); | |
1705 } | |
1706 | |
1707 // There is the possibility we need to up convert from 8 to 16 bit, | |
1708 // create a 16 bit string for the result. | |
1709 UChar* data; | |
1710 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
1711 | |
1712 for (i = 0; i != m_length; ++i) { | |
1713 UChar ch = characters8()[i]; | |
1714 if (ch == oldC) | |
1715 ch = newC; | |
1716 data[i] = ch; | |
1717 } | |
1718 | |
1719 return newImpl.release(); | |
1720 } | |
1721 | |
1722 UChar* data; | |
1723 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
1724 | |
1725 for (i = 0; i != m_length; ++i) { | |
1726 UChar ch = characters16()[i]; | |
1727 if (ch == oldC) | |
1728 ch = newC; | |
1729 data[i] = ch; | |
1730 } | |
1731 return newImpl.release(); | |
1732 } | |
1733 | |
1734 // TODO(esprehn): Passing a null replacement is the same as empty string for | |
1735 // this method but all others treat null as a no-op. We should choose one | |
1736 // behavior. | |
1737 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, | |
1738 unsigned lengthToReplace, | |
1739 const StringView& string) { | |
1740 position = min(position, length()); | |
1741 lengthToReplace = min(lengthToReplace, length() - position); | |
1742 unsigned lengthToInsert = string.length(); | |
1743 if (!lengthToReplace && !lengthToInsert) | |
1744 return this; | |
1745 | |
1746 RELEASE_ASSERT((length() - lengthToReplace) < | |
1747 (numeric_limits<unsigned>::max() - lengthToInsert)); | |
1748 | |
1749 if (is8Bit() && (string.isNull() || string.is8Bit())) { | |
1750 LChar* data; | |
1751 RefPtr<StringImpl> newImpl = | |
1752 createUninitialized(length() - lengthToReplace + lengthToInsert, data); | |
1753 memcpy(data, characters8(), position * sizeof(LChar)); | |
1754 if (!string.isNull()) | |
1755 memcpy(data + position, string.characters8(), | |
1756 lengthToInsert * sizeof(LChar)); | |
1757 memcpy(data + position + lengthToInsert, | |
1758 characters8() + position + lengthToReplace, | |
1759 (length() - position - lengthToReplace) * sizeof(LChar)); | |
1760 return newImpl.release(); | |
1761 } | |
1762 UChar* data; | |
1763 RefPtr<StringImpl> newImpl = | |
1764 createUninitialized(length() - lengthToReplace + lengthToInsert, data); | |
1765 if (is8Bit()) | |
1766 for (unsigned i = 0; i < position; ++i) | |
1767 data[i] = characters8()[i]; | |
1768 else | |
1769 memcpy(data, characters16(), position * sizeof(UChar)); | |
1770 if (!string.isNull()) { | |
1771 if (string.is8Bit()) | |
1772 for (unsigned i = 0; i < lengthToInsert; ++i) | |
1773 data[i + position] = string.characters8()[i]; | |
1774 else | |
1775 memcpy(data + position, string.characters16(), | |
1776 lengthToInsert * sizeof(UChar)); | |
1777 } | |
1778 if (is8Bit()) { | |
1779 for (unsigned i = 0; i < length() - position - lengthToReplace; ++i) | |
1780 data[i + position + lengthToInsert] = | |
1781 characters8()[i + position + lengthToReplace]; | |
1782 } else { | |
1783 memcpy(data + position + lengthToInsert, | |
1784 characters16() + position + lengthToReplace, | |
1785 (length() - position - lengthToReplace) * sizeof(UChar)); | |
1786 } | |
1787 return newImpl.release(); | |
1788 } | |
1789 | |
1790 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, | |
1791 const StringView& replacement) { | |
1792 if (replacement.isNull()) | |
1793 return this; | |
1794 if (replacement.is8Bit()) | |
1795 return replace(pattern, replacement.characters8(), replacement.length()); | |
1796 return replace(pattern, replacement.characters16(), replacement.length()); | |
1797 } | |
1798 | |
1799 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, | |
1800 const LChar* replacement, | |
1801 unsigned repStrLength) { | |
1802 DCHECK(replacement); | |
1803 | |
1804 size_t srcSegmentStart = 0; | |
1805 unsigned matchCount = 0; | |
1806 | |
1807 // Count the matches. | |
1808 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) { | |
1809 ++matchCount; | |
1810 ++srcSegmentStart; | |
1811 } | |
1812 | |
1813 // If we have 0 matches then we don't have to do any more work. | |
1814 if (!matchCount) | |
1815 return this; | |
1816 | |
1817 RELEASE_ASSERT(!repStrLength || | |
1818 matchCount <= numeric_limits<unsigned>::max() / repStrLength); | |
1819 | |
1820 unsigned replaceSize = matchCount * repStrLength; | |
1821 unsigned newSize = m_length - matchCount; | |
1822 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize)); | |
1823 | |
1824 newSize += replaceSize; | |
1825 | |
1826 // Construct the new data. | |
1827 size_t srcSegmentEnd; | |
1828 unsigned srcSegmentLength; | |
1829 srcSegmentStart = 0; | |
1830 unsigned dstOffset = 0; | |
1831 | |
1832 if (is8Bit()) { | |
1833 LChar* data; | |
1834 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | |
1835 | |
1836 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { | |
1837 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
1838 memcpy(data + dstOffset, characters8() + srcSegmentStart, | |
1839 srcSegmentLength * sizeof(LChar)); | |
1840 dstOffset += srcSegmentLength; | |
1841 memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar)); | |
1842 dstOffset += repStrLength; | |
1843 srcSegmentStart = srcSegmentEnd + 1; | |
1844 } | |
1845 | |
1846 srcSegmentLength = m_length - srcSegmentStart; | |
1847 memcpy(data + dstOffset, characters8() + srcSegmentStart, | |
1848 srcSegmentLength * sizeof(LChar)); | |
1849 | |
1850 DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length()); | |
1851 | |
1852 return newImpl.release(); | |
1853 } | |
1854 | |
1855 UChar* data; | |
1856 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | |
1857 | |
1858 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { | |
1859 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
1860 memcpy(data + dstOffset, characters16() + srcSegmentStart, | |
1861 srcSegmentLength * sizeof(UChar)); | |
1862 | |
1863 dstOffset += srcSegmentLength; | |
1864 for (unsigned i = 0; i < repStrLength; ++i) | |
1865 data[i + dstOffset] = replacement[i]; | |
1866 | |
1867 dstOffset += repStrLength; | |
1868 srcSegmentStart = srcSegmentEnd + 1; | |
1869 } | |
1870 | |
1871 srcSegmentLength = m_length - srcSegmentStart; | |
1872 memcpy(data + dstOffset, characters16() + srcSegmentStart, | |
1873 srcSegmentLength * sizeof(UChar)); | |
1874 | |
1875 DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length()); | |
1876 | |
1877 return newImpl.release(); | |
1878 } | |
1879 | |
1880 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, | |
1881 const UChar* replacement, | |
1882 unsigned repStrLength) { | |
1883 DCHECK(replacement); | |
1884 | |
1885 size_t srcSegmentStart = 0; | |
1886 unsigned matchCount = 0; | |
1887 | |
1888 // Count the matches. | |
1889 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) { | |
1890 ++matchCount; | |
1891 ++srcSegmentStart; | |
1892 } | |
1893 | |
1894 // If we have 0 matches then we don't have to do any more work. | |
1895 if (!matchCount) | |
1896 return this; | |
1897 | |
1898 RELEASE_ASSERT(!repStrLength || | |
1899 matchCount <= numeric_limits<unsigned>::max() / repStrLength); | |
1900 | |
1901 unsigned replaceSize = matchCount * repStrLength; | |
1902 unsigned newSize = m_length - matchCount; | |
1903 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize)); | |
1904 | |
1905 newSize += replaceSize; | |
1906 | |
1907 // Construct the new data. | |
1908 size_t srcSegmentEnd; | |
1909 unsigned srcSegmentLength; | |
1910 srcSegmentStart = 0; | |
1911 unsigned dstOffset = 0; | |
1912 | |
1913 if (is8Bit()) { | |
1914 UChar* data; | |
1915 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | |
1916 | |
1917 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { | |
1918 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
1919 for (unsigned i = 0; i < srcSegmentLength; ++i) | |
1920 data[i + dstOffset] = characters8()[i + srcSegmentStart]; | |
1921 | |
1922 dstOffset += srcSegmentLength; | |
1923 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar)); | |
1924 | |
1925 dstOffset += repStrLength; | |
1926 srcSegmentStart = srcSegmentEnd + 1; | |
1927 } | |
1928 | |
1929 srcSegmentLength = m_length - srcSegmentStart; | |
1930 for (unsigned i = 0; i < srcSegmentLength; ++i) | |
1931 data[i + dstOffset] = characters8()[i + srcSegmentStart]; | |
1932 | |
1933 DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length()); | |
1934 | |
1935 return newImpl.release(); | |
1936 } | |
1937 | |
1938 UChar* data; | |
1939 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | |
1940 | |
1941 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { | |
1942 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
1943 memcpy(data + dstOffset, characters16() + srcSegmentStart, | |
1944 srcSegmentLength * sizeof(UChar)); | |
1945 | |
1946 dstOffset += srcSegmentLength; | |
1947 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar)); | |
1948 | |
1949 dstOffset += repStrLength; | |
1950 srcSegmentStart = srcSegmentEnd + 1; | |
1951 } | |
1952 | |
1953 srcSegmentLength = m_length - srcSegmentStart; | |
1954 memcpy(data + dstOffset, characters16() + srcSegmentStart, | |
1955 srcSegmentLength * sizeof(UChar)); | |
1956 | |
1957 DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length()); | |
1958 | |
1959 return newImpl.release(); | |
1960 } | |
1961 | |
1962 PassRefPtr<StringImpl> StringImpl::replace(const StringView& pattern, | |
1963 const StringView& replacement) { | |
1964 if (pattern.isNull() || replacement.isNull()) | |
1965 return this; | |
1966 | |
1967 unsigned patternLength = pattern.length(); | |
1968 if (!patternLength) | |
1969 return this; | |
1970 | |
1971 unsigned repStrLength = replacement.length(); | |
1972 size_t srcSegmentStart = 0; | |
1973 unsigned matchCount = 0; | |
1974 | |
1975 // Count the matches. | |
1976 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) { | |
1977 ++matchCount; | |
1978 srcSegmentStart += patternLength; | |
1979 } | |
1980 | |
1981 // If we have 0 matches, we don't have to do any more work | |
1982 if (!matchCount) | |
1983 return this; | |
1984 | |
1985 unsigned newSize = m_length - matchCount * patternLength; | |
1986 RELEASE_ASSERT(!repStrLength || | |
1987 matchCount <= numeric_limits<unsigned>::max() / repStrLength); | |
1988 | |
1989 RELEASE_ASSERT(newSize <= | |
1990 (numeric_limits<unsigned>::max() - matchCount * repStrLength)); | |
1991 | |
1992 newSize += matchCount * repStrLength; | |
1993 | |
1994 // Construct the new data | |
1995 size_t srcSegmentEnd; | |
1996 unsigned srcSegmentLength; | |
1997 srcSegmentStart = 0; | |
1998 unsigned dstOffset = 0; | |
1999 bool srcIs8Bit = is8Bit(); | |
2000 bool replacementIs8Bit = replacement.is8Bit(); | |
2001 | |
2002 // There are 4 cases: | |
2003 // 1. This and replacement are both 8 bit. | |
2004 // 2. This and replacement are both 16 bit. | |
2005 // 3. This is 8 bit and replacement is 16 bit. | |
2006 // 4. This is 16 bit and replacement is 8 bit. | |
2007 if (srcIs8Bit && replacementIs8Bit) { | |
2008 // Case 1 | |
2009 LChar* data; | |
2010 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | |
2011 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { | |
2012 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
2013 memcpy(data + dstOffset, characters8() + srcSegmentStart, | |
2014 srcSegmentLength * sizeof(LChar)); | |
2015 dstOffset += srcSegmentLength; | |
2016 memcpy(data + dstOffset, replacement.characters8(), | |
2017 repStrLength * sizeof(LChar)); | |
2018 dstOffset += repStrLength; | |
2019 srcSegmentStart = srcSegmentEnd + patternLength; | |
2020 } | |
2021 | |
2022 srcSegmentLength = m_length - srcSegmentStart; | |
2023 memcpy(data + dstOffset, characters8() + srcSegmentStart, | |
2024 srcSegmentLength * sizeof(LChar)); | |
2025 | |
2026 DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length()); | |
2027 | |
2028 return newImpl.release(); | |
2029 } | |
2030 | |
2031 UChar* data; | |
2032 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | |
2033 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { | |
2034 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
2035 if (srcIs8Bit) { | |
2036 // Case 3. | |
2037 for (unsigned i = 0; i < srcSegmentLength; ++i) | |
2038 data[i + dstOffset] = characters8()[i + srcSegmentStart]; | |
2039 } else { | |
2040 // Case 2 & 4. | |
2041 memcpy(data + dstOffset, characters16() + srcSegmentStart, | |
2042 srcSegmentLength * sizeof(UChar)); | |
2043 } | |
2044 dstOffset += srcSegmentLength; | |
2045 if (replacementIs8Bit) { | |
2046 // Cases 2 & 3. | |
2047 for (unsigned i = 0; i < repStrLength; ++i) | |
2048 data[i + dstOffset] = replacement.characters8()[i]; | |
2049 } else { | |
2050 // Case 4 | |
2051 memcpy(data + dstOffset, replacement.characters16(), | |
2052 repStrLength * sizeof(UChar)); | |
2053 } | |
2054 dstOffset += repStrLength; | |
2055 srcSegmentStart = srcSegmentEnd + patternLength; | |
2056 } | |
2057 | |
2058 srcSegmentLength = m_length - srcSegmentStart; | |
2059 if (srcIs8Bit) { | |
2060 // Case 3. | |
2061 for (unsigned i = 0; i < srcSegmentLength; ++i) | |
2062 data[i + dstOffset] = characters8()[i + srcSegmentStart]; | |
2063 } else { | |
2064 // Cases 2 & 4. | |
2065 memcpy(data + dstOffset, characters16() + srcSegmentStart, | |
2066 srcSegmentLength * sizeof(UChar)); | |
2067 } | |
2068 | |
2069 DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length()); | |
2070 | |
2071 return newImpl.release(); | |
2072 } | |
2073 | |
2074 PassRefPtr<StringImpl> StringImpl::upconvertedString() { | |
2075 if (is8Bit()) | |
2076 return String::make16BitFrom8BitSource(characters8(), m_length) | |
2077 .releaseImpl(); | |
2078 return this; | |
2079 } | |
2080 | |
2081 static inline bool stringImplContentEqual(const StringImpl* a, | |
2082 const StringImpl* b) { | |
2083 unsigned aLength = a->length(); | |
2084 unsigned bLength = b->length(); | |
2085 if (aLength != bLength) | |
2086 return false; | |
2087 | |
2088 if (a->is8Bit()) { | |
2089 if (b->is8Bit()) | |
2090 return equal(a->characters8(), b->characters8(), aLength); | |
2091 | |
2092 return equal(a->characters8(), b->characters16(), aLength); | |
2093 } | |
2094 | |
2095 if (b->is8Bit()) | |
2096 return equal(a->characters16(), b->characters8(), aLength); | |
2097 | |
2098 return equal(a->characters16(), b->characters16(), aLength); | |
2099 } | |
2100 | |
2101 bool equal(const StringImpl* a, const StringImpl* b) { | |
2102 if (a == b) | |
2103 return true; | |
2104 if (!a || !b) | |
2105 return false; | |
2106 if (a->isAtomic() && b->isAtomic()) | |
2107 return false; | |
2108 | |
2109 return stringImplContentEqual(a, b); | |
2110 } | |
2111 | |
2112 template <typename CharType> | |
2113 inline bool equalInternal(const StringImpl* a, | |
2114 const CharType* b, | |
2115 unsigned length) { | |
2116 if (!a) | |
2117 return !b; | |
2118 if (!b) | |
2119 return false; | |
2120 | |
2121 if (a->length() != length) | |
2122 return false; | |
2123 if (a->is8Bit()) | |
2124 return equal(a->characters8(), b, length); | |
2125 return equal(a->characters16(), b, length); | |
2126 } | |
2127 | |
2128 bool equal(const StringImpl* a, const LChar* b, unsigned length) { | |
2129 return equalInternal(a, b, length); | |
2130 } | |
2131 | |
2132 bool equal(const StringImpl* a, const UChar* b, unsigned length) { | |
2133 return equalInternal(a, b, length); | |
2134 } | |
2135 | |
2136 bool equal(const StringImpl* a, const LChar* b) { | |
2137 if (!a) | |
2138 return !b; | |
2139 if (!b) | |
2140 return !a; | |
2141 | |
2142 unsigned length = a->length(); | |
2143 | |
2144 if (a->is8Bit()) { | |
2145 const LChar* aPtr = a->characters8(); | |
2146 for (unsigned i = 0; i != length; ++i) { | |
2147 LChar bc = b[i]; | |
2148 LChar ac = aPtr[i]; | |
2149 if (!bc) | |
2150 return false; | |
2151 if (ac != bc) | |
2152 return false; | |
2153 } | |
2154 | |
2155 return !b[length]; | |
2156 } | |
2157 | |
2158 const UChar* aPtr = a->characters16(); | |
2159 for (unsigned i = 0; i != length; ++i) { | |
2160 LChar bc = b[i]; | |
2161 if (!bc) | |
2162 return false; | |
2163 if (aPtr[i] != bc) | |
2164 return false; | |
2165 } | |
2166 | |
2167 return !b[length]; | |
2168 } | |
2169 | |
2170 bool equalNonNull(const StringImpl* a, const StringImpl* b) { | |
2171 DCHECK(a); | |
2172 DCHECK(b); | |
2173 if (a == b) | |
2174 return true; | |
2175 | |
2176 return stringImplContentEqual(a, b); | |
2177 } | |
2178 | |
2179 bool equalIgnoringNullity(StringImpl* a, StringImpl* b) { | |
2180 if (!a && b && !b->length()) | |
2181 return true; | |
2182 if (!b && a && !a->length()) | |
2183 return true; | |
2184 return equal(a, b); | |
2185 } | |
2186 | |
2187 template <typename CharacterType1, typename CharacterType2> | |
2188 int codePointCompareIgnoringASCIICase(unsigned l1, | |
2189 unsigned l2, | |
2190 const CharacterType1* c1, | |
2191 const CharacterType2* c2) { | |
2192 const unsigned lmin = l1 < l2 ? l1 : l2; | |
2193 unsigned pos = 0; | |
2194 while (pos < lmin && toASCIILower(*c1) == toASCIILower(*c2)) { | |
2195 ++c1; | |
2196 ++c2; | |
2197 ++pos; | |
2198 } | |
2199 | |
2200 if (pos < lmin) | |
2201 return (toASCIILower(c1[0]) > toASCIILower(c2[0])) ? 1 : -1; | |
2202 | |
2203 if (l1 == l2) | |
2204 return 0; | |
2205 | |
2206 return (l1 > l2) ? 1 : -1; | |
2207 } | |
2208 | |
2209 int codePointCompareIgnoringASCIICase(const StringImpl* string1, | |
2210 const LChar* string2) { | |
2211 unsigned length1 = string1 ? string1->length() : 0; | |
2212 size_t length2 = string2 ? strlen(reinterpret_cast<const char*>(string2)) : 0; | |
2213 | |
2214 if (!string1) | |
2215 return length2 > 0 ? -1 : 0; | |
2216 | |
2217 if (!string2) | |
2218 return length1 > 0 ? 1 : 0; | |
2219 | |
2220 if (string1->is8Bit()) | |
2221 return codePointCompareIgnoringASCIICase(length1, length2, | |
2222 string1->characters8(), string2); | |
2223 return codePointCompareIgnoringASCIICase(length1, length2, | |
2224 string1->characters16(), string2); | |
2225 } | |
2226 | |
2227 UChar32 toUpper(UChar32 c, const AtomicString& localeIdentifier) { | |
2228 if (!localeIdentifier.isNull()) { | |
2229 if (localeIdMatchesLang(localeIdentifier, "tr") || | |
2230 localeIdMatchesLang(localeIdentifier, "az")) { | |
2231 if (c == 'i') | |
2232 return latinCapitalLetterIWithDotAbove; | |
2233 if (c == latinSmallLetterDotlessI) | |
2234 return 'I'; | |
2235 } else if (localeIdMatchesLang(localeIdentifier, "lt")) { | |
2236 // TODO(rob.buis) implement upper-casing rules for lt | |
2237 // like in StringImpl::upper(locale). | |
2238 } | |
2239 } | |
2240 | |
2241 return toUpper(c); | |
2242 } | |
2243 | |
2244 } // namespace WTF | |
OLD | NEW |