Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(26)

Side by Side Diff: third_party/WebKit/Source/wtf/text/StringImpl.cpp

Issue 2764243002: Move files in wtf/ to platform/wtf/ (Part 9). (Closed)
Patch Set: Rebase. Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 * (C) 1999 Antti Koivisto (koivisto@kde.org)
4 * (C) 2001 Dirk Mueller ( mueller@kde.org )
5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All
6 * rights reserved.
7 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Library General Public
11 * License as published by the Free Software Foundation; either
12 * version 2 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Library General Public License for more details.
18 *
19 * You should have received a copy of the GNU Library General Public License
20 * along with this library; see the file COPYING.LIB. If not, write to
21 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
22 * Boston, MA 02110-1301, USA.
23 *
24 */
25
26 #include "wtf/text/StringImpl.h"
27
28 #include "wtf/DynamicAnnotations.h"
29 #include "wtf/LeakAnnotations.h"
30 #include "wtf/PtrUtil.h"
31 #include "wtf/StaticConstructors.h"
32 #include "wtf/StdLibExtras.h"
33 #include "wtf/allocator/Partitions.h"
34 #include "wtf/text/AtomicString.h"
35 #include "wtf/text/AtomicStringTable.h"
36 #include "wtf/text/CString.h"
37 #include "wtf/text/CharacterNames.h"
38 #include "wtf/text/StringBuffer.h"
39 #include "wtf/text/StringHash.h"
40 #include "wtf/text/StringToNumber.h"
41 #include <algorithm>
42 #include <memory>
43
44 #ifdef STRING_STATS
45 #include "wtf/DataLog.h"
46 #include "wtf/HashMap.h"
47 #include "wtf/HashSet.h"
48 #include "wtf/RefCounted.h"
49 #include "wtf/ThreadingPrimitives.h"
50 #include <unistd.h>
51 #endif
52
53 using namespace std;
54
55 namespace WTF {
56
57 using namespace Unicode;
58
59 // As of Jan 2017, StringImpl needs 2 * sizeof(int) + 29 bits of data, and
60 // sizeof(ThreadRestrictionVerifier) is 16 bytes. Thus, in DCHECK mode the
61 // class may be padded to 32 bytes.
62 #if DCHECK_IS_ON()
63 static_assert(sizeof(StringImpl) <= 8 * sizeof(int),
64 "StringImpl should stay small");
65 #else
66 static_assert(sizeof(StringImpl) <= 3 * sizeof(int),
67 "StringImpl should stay small");
68 #endif
69
70 #ifdef STRING_STATS
71
72 static Mutex& statsMutex() {
73 DEFINE_STATIC_LOCAL(Mutex, mutex, ());
74 return mutex;
75 }
76
77 static HashSet<void*>& liveStrings() {
78 // Notice that we can't use HashSet<StringImpl*> because then HashSet would
79 // dedup identical strings.
80 DEFINE_STATIC_LOCAL(HashSet<void*>, strings, ());
81 return strings;
82 }
83
84 void addStringForStats(StringImpl* string) {
85 MutexLocker locker(statsMutex());
86 liveStrings().add(string);
87 }
88
89 void removeStringForStats(StringImpl* string) {
90 MutexLocker locker(statsMutex());
91 liveStrings().remove(string);
92 }
93
94 static void fillWithSnippet(const StringImpl* string, Vector<char>& snippet) {
95 const unsigned kMaxSnippetLength = 64;
96 snippet.clear();
97
98 size_t expectedLength = std::min(string->length(), kMaxSnippetLength);
99 if (expectedLength == kMaxSnippetLength)
100 expectedLength += 3; // For the "...".
101 ++expectedLength; // For the terminating '\0'.
102 snippet.reserveCapacity(expectedLength);
103
104 size_t i;
105 for (i = 0; i < string->length() && i < kMaxSnippetLength; ++i) {
106 UChar c = (*string)[i];
107 if (isASCIIPrintable(c))
108 snippet.append(c);
109 else
110 snippet.append('?');
111 }
112 if (i < string->length()) {
113 snippet.append('.');
114 snippet.append('.');
115 snippet.append('.');
116 }
117 snippet.append('\0');
118 }
119
120 static bool isUnnecessarilyWide(const StringImpl* string) {
121 if (string->is8Bit())
122 return false;
123 UChar c = 0;
124 for (unsigned i = 0; i < string->length(); ++i)
125 c |= (*string)[i] >> 8;
126 return !c;
127 }
128
129 class PerStringStats : public RefCounted<PerStringStats> {
130 public:
131 static PassRefPtr<PerStringStats> create() {
132 return adoptRef(new PerStringStats);
133 }
134
135 void add(const StringImpl* string) {
136 ++m_numberOfCopies;
137 if (!m_length) {
138 m_length = string->length();
139 fillWithSnippet(string, m_snippet);
140 }
141 if (string->isAtomic())
142 ++m_numberOfAtomicCopies;
143 if (isUnnecessarilyWide(string))
144 m_unnecessarilyWide = true;
145 }
146
147 size_t totalCharacters() const { return m_numberOfCopies * m_length; }
148
149 void print() {
150 const char* status = "ok";
151 if (m_unnecessarilyWide)
152 status = "16";
153 dataLogF("%8u copies (%s) of length %8u %s\n", m_numberOfCopies, status,
154 m_length, m_snippet.data());
155 }
156
157 bool m_unnecessarilyWide;
158 unsigned m_numberOfCopies;
159 unsigned m_length;
160 unsigned m_numberOfAtomicCopies;
161 Vector<char> m_snippet;
162
163 private:
164 PerStringStats()
165 : m_unnecessarilyWide(false),
166 m_numberOfCopies(0),
167 m_length(0),
168 m_numberOfAtomicCopies(0) {}
169 };
170
171 bool operator<(const RefPtr<PerStringStats>& a,
172 const RefPtr<PerStringStats>& b) {
173 if (a->m_unnecessarilyWide != b->m_unnecessarilyWide)
174 return !a->m_unnecessarilyWide && b->m_unnecessarilyWide;
175 if (a->totalCharacters() != b->totalCharacters())
176 return a->totalCharacters() < b->totalCharacters();
177 if (a->m_numberOfCopies != b->m_numberOfCopies)
178 return a->m_numberOfCopies < b->m_numberOfCopies;
179 if (a->m_length != b->m_length)
180 return a->m_length < b->m_length;
181 return a->m_numberOfAtomicCopies < b->m_numberOfAtomicCopies;
182 }
183
184 static void printLiveStringStats(void*) {
185 MutexLocker locker(statsMutex());
186 HashSet<void*>& strings = liveStrings();
187
188 HashMap<StringImpl*, RefPtr<PerStringStats>> stats;
189 for (HashSet<void*>::iterator iter = strings.begin(); iter != strings.end();
190 ++iter) {
191 StringImpl* string = static_cast<StringImpl*>(*iter);
192 HashMap<StringImpl*, RefPtr<PerStringStats>>::iterator entry =
193 stats.find(string);
194 RefPtr<PerStringStats> value =
195 entry == stats.end() ? RefPtr<PerStringStats>(PerStringStats::create())
196 : entry->value;
197 value->add(string);
198 stats.set(string, value.release());
199 }
200
201 Vector<RefPtr<PerStringStats>> all;
202 for (HashMap<StringImpl*, RefPtr<PerStringStats>>::iterator iter =
203 stats.begin();
204 iter != stats.end(); ++iter)
205 all.append(iter->value);
206
207 std::sort(all.begin(), all.end());
208 std::reverse(all.begin(), all.end());
209 for (size_t i = 0; i < 20 && i < all.size(); ++i)
210 all[i]->print();
211 }
212
213 StringStats StringImpl::m_stringStats;
214
215 unsigned StringStats::s_stringRemovesTillPrintStats =
216 StringStats::s_printStringStatsFrequency;
217
218 void StringStats::removeString(StringImpl* string) {
219 unsigned length = string->length();
220 --m_totalNumberStrings;
221
222 if (string->is8Bit()) {
223 --m_number8BitStrings;
224 m_total8BitData -= length;
225 } else {
226 --m_number16BitStrings;
227 m_total16BitData -= length;
228 }
229
230 if (!--s_stringRemovesTillPrintStats) {
231 s_stringRemovesTillPrintStats = s_printStringStatsFrequency;
232 printStats();
233 }
234 }
235
236 void StringStats::printStats() {
237 dataLogF("String stats for process id %d:\n", getpid());
238
239 unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitData;
240 double percent8Bit =
241 m_totalNumberStrings
242 ? ((double)m_number8BitStrings * 100) / (double)m_totalNumberStrings
243 : 0.0;
244 double average8bitLength =
245 m_number8BitStrings
246 ? (double)m_total8BitData / (double)m_number8BitStrings
247 : 0.0;
248 dataLogF(
249 "%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length "
250 "%6.1f\n",
251 m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData,
252 average8bitLength);
253
254 double percent16Bit =
255 m_totalNumberStrings
256 ? ((double)m_number16BitStrings * 100) / (double)m_totalNumberStrings
257 : 0.0;
258 double average16bitLength =
259 m_number16BitStrings
260 ? (double)m_total16BitData / (double)m_number16BitStrings
261 : 0.0;
262 dataLogF(
263 "%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length "
264 "%6.1f\n",
265 m_number16BitStrings, percent16Bit, m_total16BitData,
266 m_total16BitData * 2, average16bitLength);
267
268 double averageLength =
269 m_totalNumberStrings
270 ? (double)totalNumberCharacters / (double)m_totalNumberStrings
271 : 0.0;
272 unsigned long long totalDataBytes = m_total8BitData + m_total16BitData * 2;
273 dataLogF(
274 "%8u Total %12llu chars %12llu bytes avg length "
275 "%6.1f\n",
276 m_totalNumberStrings, totalNumberCharacters, totalDataBytes,
277 averageLength);
278 unsigned long long totalSavedBytes = m_total8BitData;
279 double percentSavings = totalSavedBytes
280 ? ((double)totalSavedBytes * 100) /
281 (double)(totalDataBytes + totalSavedBytes)
282 : 0.0;
283 dataLogF(" Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes,
284 percentSavings);
285
286 unsigned totalOverhead = m_totalNumberStrings * sizeof(StringImpl);
287 double overheadPercent = (double)totalOverhead / (double)totalDataBytes * 100;
288 dataLogF(" StringImpl overheader: %8u (%5.2f%%)\n", totalOverhead,
289 overheadPercent);
290
291 internal::callOnMainThread(&printLiveStringStats, nullptr);
292 }
293 #endif
294
295 void* StringImpl::operator new(size_t size) {
296 DCHECK_EQ(size, sizeof(StringImpl));
297 return Partitions::bufferMalloc(size, "WTF::StringImpl");
298 }
299
300 void StringImpl::operator delete(void* ptr) {
301 Partitions::bufferFree(ptr);
302 }
303
304 inline StringImpl::~StringImpl() {
305 DCHECK(!isStatic());
306
307 STRING_STATS_REMOVE_STRING(this);
308
309 if (isAtomic())
310 AtomicStringTable::instance().remove(this);
311 }
312
313 void StringImpl::destroyIfNotStatic() const {
314 if (!isStatic())
315 delete this;
316 }
317
318 void StringImpl::updateContainsOnlyASCII() const {
319 m_containsOnlyASCII = is8Bit()
320 ? charactersAreAllASCII(characters8(), length())
321 : charactersAreAllASCII(characters16(), length());
322 m_needsASCIICheck = false;
323 }
324
325 bool StringImpl::isSafeToSendToAnotherThread() const {
326 if (isStatic())
327 return true;
328 // AtomicStrings are not safe to send between threads as ~StringImpl()
329 // will try to remove them from the wrong AtomicStringTable.
330 if (isAtomic())
331 return false;
332 if (hasOneRef())
333 return true;
334 return false;
335 }
336
337 #if DCHECK_IS_ON()
338 std::string StringImpl::asciiForDebugging() const {
339 CString ascii = String(isolatedCopy()->substring(0, 128)).ascii();
340 return std::string(ascii.data(), ascii.length());
341 }
342 #endif
343
344 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length,
345 LChar*& data) {
346 if (!length) {
347 data = 0;
348 return empty;
349 }
350
351 // Allocate a single buffer large enough to contain the StringImpl
352 // struct as well as the data which it contains. This removes one
353 // heap allocation from this call.
354 StringImpl* string = static_cast<StringImpl*>(Partitions::bufferMalloc(
355 allocationSize<LChar>(length), "WTF::StringImpl"));
356
357 data = reinterpret_cast<LChar*>(string + 1);
358 return adoptRef(new (string) StringImpl(length, Force8BitConstructor));
359 }
360
361 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length,
362 UChar*& data) {
363 if (!length) {
364 data = 0;
365 return empty;
366 }
367
368 // Allocate a single buffer large enough to contain the StringImpl
369 // struct as well as the data which it contains. This removes one
370 // heap allocation from this call.
371 StringImpl* string = static_cast<StringImpl*>(Partitions::bufferMalloc(
372 allocationSize<UChar>(length), "WTF::StringImpl"));
373
374 data = reinterpret_cast<UChar*>(string + 1);
375 return adoptRef(new (string) StringImpl(length));
376 }
377
378 static StaticStringsTable& staticStrings() {
379 DEFINE_STATIC_LOCAL(StaticStringsTable, staticStrings, ());
380 return staticStrings;
381 }
382
383 #if DCHECK_IS_ON()
384 static bool s_allowCreationOfStaticStrings = true;
385 #endif
386
387 const StaticStringsTable& StringImpl::allStaticStrings() {
388 return staticStrings();
389 }
390
391 void StringImpl::freezeStaticStrings() {
392 DCHECK(isMainThread());
393
394 #if DCHECK_IS_ON()
395 s_allowCreationOfStaticStrings = false;
396 #endif
397 }
398
399 unsigned StringImpl::m_highestStaticStringLength = 0;
400
401 DEFINE_GLOBAL(StringImpl, globalEmpty);
402 DEFINE_GLOBAL(StringImpl, globalEmpty16Bit);
403 // Callers need the global empty strings to be non-const.
404 StringImpl* StringImpl::empty = const_cast<StringImpl*>(&globalEmpty);
405 StringImpl* StringImpl::empty16Bit = const_cast<StringImpl*>(&globalEmpty16Bit);
406 void StringImpl::initStatics() {
407 new ((void*)empty) StringImpl(ConstructEmptyString);
408 new ((void*)empty16Bit) StringImpl(ConstructEmptyString16Bit);
409 WTF_ANNOTATE_BENIGN_RACE(StringImpl::empty,
410 "Benign race on the reference counter of a static "
411 "string created by StringImpl::empty");
412 WTF_ANNOTATE_BENIGN_RACE(StringImpl::empty16Bit,
413 "Benign race on the reference counter of a static "
414 "string created by StringImpl::empty16Bit");
415 }
416
417 StringImpl* StringImpl::createStatic(const char* string,
418 unsigned length,
419 unsigned hash) {
420 #if DCHECK_IS_ON()
421 DCHECK(s_allowCreationOfStaticStrings);
422 #endif
423 DCHECK(string);
424 DCHECK(length);
425
426 StaticStringsTable::const_iterator it = staticStrings().find(hash);
427 if (it != staticStrings().end()) {
428 DCHECK(!memcmp(string, it->value + 1, length * sizeof(LChar)));
429 return it->value;
430 }
431
432 // Allocate a single buffer large enough to contain the StringImpl
433 // struct as well as the data which it contains. This removes one
434 // heap allocation from this call.
435 RELEASE_ASSERT(length <=
436 ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) /
437 sizeof(LChar)));
438 size_t size = sizeof(StringImpl) + length * sizeof(LChar);
439
440 WTF_INTERNAL_LEAK_SANITIZER_DISABLED_SCOPE;
441 StringImpl* impl = static_cast<StringImpl*>(
442 Partitions::bufferMalloc(size, "WTF::StringImpl"));
443
444 LChar* data = reinterpret_cast<LChar*>(impl + 1);
445 impl = new (impl) StringImpl(length, hash, StaticString);
446 memcpy(data, string, length * sizeof(LChar));
447 #if DCHECK_IS_ON()
448 impl->assertHashIsCorrect();
449 #endif
450
451 DCHECK(isMainThread());
452 m_highestStaticStringLength = std::max(m_highestStaticStringLength, length);
453 staticStrings().insert(hash, impl);
454 WTF_ANNOTATE_BENIGN_RACE(impl,
455 "Benign race on the reference counter of a static "
456 "string created by StringImpl::createStatic");
457
458 return impl;
459 }
460
461 void StringImpl::reserveStaticStringsCapacityForSize(unsigned size) {
462 #if DCHECK_IS_ON()
463 DCHECK(s_allowCreationOfStaticStrings);
464 #endif
465 staticStrings().reserveCapacityForSize(size);
466 }
467
468 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters,
469 unsigned length) {
470 if (!characters || !length)
471 return empty;
472
473 UChar* data;
474 RefPtr<StringImpl> string = createUninitialized(length, data);
475 memcpy(data, characters, length * sizeof(UChar));
476 return string.release();
477 }
478
479 PassRefPtr<StringImpl> StringImpl::create(const LChar* characters,
480 unsigned length) {
481 if (!characters || !length)
482 return empty;
483
484 LChar* data;
485 RefPtr<StringImpl> string = createUninitialized(length, data);
486 memcpy(data, characters, length * sizeof(LChar));
487 return string.release();
488 }
489
490 PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters,
491 unsigned length) {
492 if (!characters || !length)
493 return empty;
494
495 LChar* data;
496 RefPtr<StringImpl> string = createUninitialized(length, data);
497
498 for (size_t i = 0; i < length; ++i) {
499 if (characters[i] & 0xff00)
500 return create(characters, length);
501 data[i] = static_cast<LChar>(characters[i]);
502 }
503
504 return string.release();
505 }
506
507 PassRefPtr<StringImpl> StringImpl::create(const LChar* string) {
508 if (!string)
509 return empty;
510 size_t length = strlen(reinterpret_cast<const char*>(string));
511 RELEASE_ASSERT(length <= numeric_limits<unsigned>::max());
512 return create(string, length);
513 }
514
515 bool StringImpl::containsOnlyWhitespace() {
516 // FIXME: The definition of whitespace here includes a number of characters
517 // that are not whitespace from the point of view of LayoutText; I wonder if
518 // that's a problem in practice.
519 if (is8Bit()) {
520 for (unsigned i = 0; i < m_length; ++i) {
521 UChar c = characters8()[i];
522 if (!isASCIISpace(c))
523 return false;
524 }
525
526 return true;
527 }
528
529 for (unsigned i = 0; i < m_length; ++i) {
530 UChar c = characters16()[i];
531 if (!isASCIISpace(c))
532 return false;
533 }
534 return true;
535 }
536
537 PassRefPtr<StringImpl> StringImpl::substring(unsigned start,
538 unsigned length) const {
539 if (start >= m_length)
540 return empty;
541 unsigned maxLength = m_length - start;
542 if (length >= maxLength) {
543 // PassRefPtr has trouble dealing with const arguments. It should be updated
544 // so this const_cast is not necessary.
545 if (!start)
546 return const_cast<StringImpl*>(this);
547 length = maxLength;
548 }
549 if (is8Bit())
550 return create(characters8() + start, length);
551
552 return create(characters16() + start, length);
553 }
554
555 UChar32 StringImpl::characterStartingAt(unsigned i) {
556 if (is8Bit())
557 return characters8()[i];
558 if (U16_IS_SINGLE(characters16()[i]))
559 return characters16()[i];
560 if (i + 1 < m_length && U16_IS_LEAD(characters16()[i]) &&
561 U16_IS_TRAIL(characters16()[i + 1]))
562 return U16_GET_SUPPLEMENTARY(characters16()[i], characters16()[i + 1]);
563 return 0;
564 }
565
566 unsigned StringImpl::copyTo(UChar* buffer,
567 unsigned start,
568 unsigned maxLength) const {
569 unsigned numberOfCharactersToCopy = std::min(length() - start, maxLength);
570 if (!numberOfCharactersToCopy)
571 return 0;
572 if (is8Bit())
573 copyChars(buffer, characters8() + start, numberOfCharactersToCopy);
574 else
575 copyChars(buffer, characters16() + start, numberOfCharactersToCopy);
576 return numberOfCharactersToCopy;
577 }
578
579 PassRefPtr<StringImpl> StringImpl::lowerASCII() {
580 // First scan the string for uppercase and non-ASCII characters:
581 if (is8Bit()) {
582 unsigned firstIndexToBeLowered = m_length;
583 for (unsigned i = 0; i < m_length; ++i) {
584 LChar ch = characters8()[i];
585 if (isASCIIUpper(ch)) {
586 firstIndexToBeLowered = i;
587 break;
588 }
589 }
590
591 // Nothing to do if the string is all ASCII with no uppercase.
592 if (firstIndexToBeLowered == m_length) {
593 return this;
594 }
595
596 LChar* data8;
597 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);
598 memcpy(data8, characters8(), firstIndexToBeLowered);
599
600 for (unsigned i = firstIndexToBeLowered; i < m_length; ++i) {
601 LChar ch = characters8()[i];
602 data8[i] = isASCIIUpper(ch) ? toASCIILower(ch) : ch;
603 }
604 return newImpl.release();
605 }
606 bool noUpper = true;
607 UChar ored = 0;
608
609 const UChar* end = characters16() + m_length;
610 for (const UChar* chp = characters16(); chp != end; ++chp) {
611 if (isASCIIUpper(*chp))
612 noUpper = false;
613 ored |= *chp;
614 }
615 // Nothing to do if the string is all ASCII with no uppercase.
616 if (noUpper && !(ored & ~0x7F))
617 return this;
618
619 RELEASE_ASSERT(m_length <=
620 static_cast<unsigned>(numeric_limits<unsigned>::max()));
621 unsigned length = m_length;
622
623 UChar* data16;
624 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
625
626 for (unsigned i = 0; i < length; ++i) {
627 UChar c = characters16()[i];
628 data16[i] = isASCIIUpper(c) ? toASCIILower(c) : c;
629 }
630 return newImpl.release();
631 }
632
633 PassRefPtr<StringImpl> StringImpl::lower() {
634 // Note: This is a hot function in the Dromaeo benchmark, specifically the
635 // no-op code path up through the first 'return' statement.
636
637 // First scan the string for uppercase and non-ASCII characters:
638 if (is8Bit()) {
639 unsigned firstIndexToBeLowered = m_length;
640 for (unsigned i = 0; i < m_length; ++i) {
641 LChar ch = characters8()[i];
642 if (UNLIKELY(isASCIIUpper(ch) || ch & ~0x7F)) {
643 firstIndexToBeLowered = i;
644 break;
645 }
646 }
647
648 // Nothing to do if the string is all ASCII with no uppercase.
649 if (firstIndexToBeLowered == m_length)
650 return this;
651
652 LChar* data8;
653 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);
654 memcpy(data8, characters8(), firstIndexToBeLowered);
655
656 for (unsigned i = firstIndexToBeLowered; i < m_length; ++i) {
657 LChar ch = characters8()[i];
658 data8[i] = UNLIKELY(ch & ~0x7F) ? static_cast<LChar>(Unicode::toLower(ch))
659 : toASCIILower(ch);
660 }
661
662 return newImpl.release();
663 }
664
665 bool noUpper = true;
666 UChar ored = 0;
667
668 const UChar* end = characters16() + m_length;
669 for (const UChar* chp = characters16(); chp != end; ++chp) {
670 if (UNLIKELY(isASCIIUpper(*chp)))
671 noUpper = false;
672 ored |= *chp;
673 }
674 // Nothing to do if the string is all ASCII with no uppercase.
675 if (noUpper && !(ored & ~0x7F))
676 return this;
677
678 RELEASE_ASSERT(m_length <=
679 static_cast<unsigned>(numeric_limits<int32_t>::max()));
680 int32_t length = m_length;
681
682 if (!(ored & ~0x7F)) {
683 UChar* data16;
684 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
685
686 for (int32_t i = 0; i < length; ++i) {
687 UChar c = characters16()[i];
688 data16[i] = toASCIILower(c);
689 }
690 return newImpl.release();
691 }
692
693 // Do a slower implementation for cases that include non-ASCII characters.
694 UChar* data16;
695 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
696
697 bool error;
698 int32_t realLength =
699 Unicode::toLower(data16, length, characters16(), m_length, &error);
700 if (!error && realLength == length)
701 return newImpl.release();
702
703 newImpl = createUninitialized(realLength, data16);
704 Unicode::toLower(data16, realLength, characters16(), m_length, &error);
705 if (error)
706 return this;
707 return newImpl.release();
708 }
709
710 PassRefPtr<StringImpl> StringImpl::upper() {
711 // This function could be optimized for no-op cases the way lower() is,
712 // but in empirical testing, few actual calls to upper() are no-ops, so
713 // it wouldn't be worth the extra time for pre-scanning.
714
715 RELEASE_ASSERT(m_length <=
716 static_cast<unsigned>(numeric_limits<int32_t>::max()));
717 int32_t length = m_length;
718
719 if (is8Bit()) {
720 LChar* data8;
721 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);
722
723 // Do a faster loop for the case where all the characters are ASCII.
724 LChar ored = 0;
725 for (int i = 0; i < length; ++i) {
726 LChar c = characters8()[i];
727 ored |= c;
728 data8[i] = toASCIIUpper(c);
729 }
730 if (!(ored & ~0x7F))
731 return newImpl.release();
732
733 // Do a slower implementation for cases that include non-ASCII Latin-1
734 // characters.
735 int numberSharpSCharacters = 0;
736
737 // There are two special cases.
738 // 1. latin-1 characters when converted to upper case are 16 bit
739 // characters.
740 // 2. Lower case sharp-S converts to "SS" (two characters)
741 for (int32_t i = 0; i < length; ++i) {
742 LChar c = characters8()[i];
743 if (UNLIKELY(c == smallLetterSharpSCharacter))
744 ++numberSharpSCharacters;
745 UChar upper = static_cast<UChar>(Unicode::toUpper(c));
746 if (UNLIKELY(upper > 0xff)) {
747 // Since this upper-cased character does not fit in an 8-bit string, we
748 // need to take the 16-bit path.
749 goto upconvert;
750 }
751 data8[i] = static_cast<LChar>(upper);
752 }
753
754 if (!numberSharpSCharacters)
755 return newImpl.release();
756
757 // We have numberSSCharacters sharp-s characters, but none of the other
758 // special characters.
759 newImpl = createUninitialized(m_length + numberSharpSCharacters, data8);
760
761 LChar* dest = data8;
762
763 for (int32_t i = 0; i < length; ++i) {
764 LChar c = characters8()[i];
765 if (c == smallLetterSharpSCharacter) {
766 *dest++ = 'S';
767 *dest++ = 'S';
768 } else {
769 *dest++ = static_cast<LChar>(Unicode::toUpper(c));
770 }
771 }
772
773 return newImpl.release();
774 }
775
776 upconvert:
777 RefPtr<StringImpl> upconverted = upconvertedString();
778 const UChar* source16 = upconverted->characters16();
779
780 UChar* data16;
781 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
782
783 // Do a faster loop for the case where all the characters are ASCII.
784 UChar ored = 0;
785 for (int i = 0; i < length; ++i) {
786 UChar c = source16[i];
787 ored |= c;
788 data16[i] = toASCIIUpper(c);
789 }
790 if (!(ored & ~0x7F))
791 return newImpl.release();
792
793 // Do a slower implementation for cases that include non-ASCII characters.
794 bool error;
795 int32_t realLength =
796 Unicode::toUpper(data16, length, source16, m_length, &error);
797 if (!error && realLength == length)
798 return newImpl;
799 newImpl = createUninitialized(realLength, data16);
800 Unicode::toUpper(data16, realLength, source16, m_length, &error);
801 if (error)
802 return this;
803 return newImpl.release();
804 }
805
806 PassRefPtr<StringImpl> StringImpl::upperASCII() {
807 if (is8Bit()) {
808 LChar* data8;
809 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);
810
811 for (unsigned i = 0; i < m_length; ++i) {
812 LChar c = characters8()[i];
813 data8[i] = isASCIILower(c) ? toASCIIUpper(c) : c;
814 }
815 return newImpl.release();
816 }
817
818 UChar* data16;
819 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
820
821 for (unsigned i = 0; i < m_length; ++i) {
822 UChar c = characters16()[i];
823 data16[i] = isASCIILower(c) ? toASCIIUpper(c) : c;
824 }
825 return newImpl.release();
826 }
827
828 static inline bool localeIdMatchesLang(const AtomicString& localeId,
829 const StringView& lang) {
830 RELEASE_ASSERT(lang.length() >= 2 && lang.length() <= 3);
831 if (!localeId.impl() || !localeId.impl()->startsWithIgnoringCase(lang))
832 return false;
833 if (localeId.impl()->length() == lang.length())
834 return true;
835 const UChar maybeDelimiter = (*localeId.impl())[lang.length()];
836 return maybeDelimiter == '-' || maybeDelimiter == '_' ||
837 maybeDelimiter == '@';
838 }
839
840 typedef int32_t (*icuCaseConverter)(UChar*,
841 int32_t,
842 const UChar*,
843 int32_t,
844 const char*,
845 UErrorCode*);
846
847 static PassRefPtr<StringImpl> caseConvert(const UChar* source16,
848 size_t length,
849 icuCaseConverter converter,
850 const char* locale,
851 StringImpl* originalString) {
852 UChar* data16;
853 size_t targetLength = length;
854 RefPtr<StringImpl> output = StringImpl::createUninitialized(length, data16);
855 do {
856 UErrorCode status = U_ZERO_ERROR;
857 targetLength =
858 converter(data16, targetLength, source16, length, locale, &status);
859 if (U_SUCCESS(status)) {
860 if (length > 0)
861 return output->substring(0, targetLength);
862 return output.release();
863 }
864 if (status != U_BUFFER_OVERFLOW_ERROR)
865 return originalString;
866 // Expand the buffer.
867 output = StringImpl::createUninitialized(targetLength, data16);
868 } while (true);
869 }
870
871 PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier) {
872 // Use the more optimized code path most of the time.
873 // Only Turkic (tr and az) languages and Lithuanian requires
874 // locale-specific lowercasing rules. Even though CLDR has el-Lower,
875 // it's identical to the locale-agnostic lowercasing. Context-dependent
876 // handling of Greek capital sigma is built into the common lowercasing
877 // function in ICU.
878 const char* localeForConversion = 0;
879 if (localeIdMatchesLang(localeIdentifier, "tr") ||
880 localeIdMatchesLang(localeIdentifier, "az"))
881 localeForConversion = "tr";
882 else if (localeIdMatchesLang(localeIdentifier, "lt"))
883 localeForConversion = "lt";
884 else
885 return lower();
886
887 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
888 CRASH();
889 int length = m_length;
890
891 RefPtr<StringImpl> upconverted = upconvertedString();
892 const UChar* source16 = upconverted->characters16();
893 return caseConvert(source16, length, u_strToLower, localeForConversion, this);
894 }
895
896 PassRefPtr<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier) {
897 // Use the more-optimized code path most of the time.
898 // Only Turkic (tr and az) languages, Greek and Lithuanian require
899 // locale-specific uppercasing rules.
900 const char* localeForConversion = 0;
901 if (localeIdMatchesLang(localeIdentifier, "tr") ||
902 localeIdMatchesLang(localeIdentifier, "az"))
903 localeForConversion = "tr";
904 else if (localeIdMatchesLang(localeIdentifier, "el"))
905 localeForConversion = "el";
906 else if (localeIdMatchesLang(localeIdentifier, "lt"))
907 localeForConversion = "lt";
908 else
909 return upper();
910
911 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
912 CRASH();
913 int length = m_length;
914
915 RefPtr<StringImpl> upconverted = upconvertedString();
916 const UChar* source16 = upconverted->characters16();
917
918 return caseConvert(source16, length, u_strToUpper, localeForConversion, this);
919 }
920
921 PassRefPtr<StringImpl> StringImpl::fill(UChar character) {
922 if (!(character & ~0x7F)) {
923 LChar* data;
924 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
925 for (unsigned i = 0; i < m_length; ++i)
926 data[i] = static_cast<LChar>(character);
927 return newImpl.release();
928 }
929 UChar* data;
930 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
931 for (unsigned i = 0; i < m_length; ++i)
932 data[i] = character;
933 return newImpl.release();
934 }
935
936 PassRefPtr<StringImpl> StringImpl::foldCase() {
937 RELEASE_ASSERT(m_length <=
938 static_cast<unsigned>(numeric_limits<int32_t>::max()));
939 int32_t length = m_length;
940
941 if (is8Bit()) {
942 // Do a faster loop for the case where all the characters are ASCII.
943 LChar* data;
944 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
945 LChar ored = 0;
946
947 for (int32_t i = 0; i < length; ++i) {
948 LChar c = characters8()[i];
949 data[i] = toASCIILower(c);
950 ored |= c;
951 }
952
953 if (!(ored & ~0x7F))
954 return newImpl.release();
955
956 // Do a slower implementation for cases that include non-ASCII Latin-1
957 // characters.
958 for (int32_t i = 0; i < length; ++i)
959 data[i] = static_cast<LChar>(Unicode::toLower(characters8()[i]));
960
961 return newImpl.release();
962 }
963
964 // Do a faster loop for the case where all the characters are ASCII.
965 UChar* data;
966 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
967 UChar ored = 0;
968 for (int32_t i = 0; i < length; ++i) {
969 UChar c = characters16()[i];
970 ored |= c;
971 data[i] = toASCIILower(c);
972 }
973 if (!(ored & ~0x7F))
974 return newImpl.release();
975
976 // Do a slower implementation for cases that include non-ASCII characters.
977 bool error;
978 int32_t realLength =
979 Unicode::foldCase(data, length, characters16(), m_length, &error);
980 if (!error && realLength == length)
981 return newImpl.release();
982 newImpl = createUninitialized(realLength, data);
983 Unicode::foldCase(data, realLength, characters16(), m_length, &error);
984 if (error)
985 return this;
986 return newImpl.release();
987 }
988
989 PassRefPtr<StringImpl> StringImpl::truncate(unsigned length) {
990 if (length >= m_length)
991 return this;
992 if (is8Bit())
993 return create(characters8(), length);
994 return create(characters16(), length);
995 }
996
997 template <class UCharPredicate>
998 inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters(
999 UCharPredicate predicate) {
1000 if (!m_length)
1001 return empty;
1002
1003 unsigned start = 0;
1004 unsigned end = m_length - 1;
1005
1006 // skip white space from start
1007 while (start <= end &&
1008 predicate(is8Bit() ? characters8()[start] : characters16()[start]))
1009 ++start;
1010
1011 // only white space
1012 if (start > end)
1013 return empty;
1014
1015 // skip white space from end
1016 while (end && predicate(is8Bit() ? characters8()[end] : characters16()[end]))
1017 --end;
1018
1019 if (!start && end == m_length - 1)
1020 return this;
1021 if (is8Bit())
1022 return create(characters8() + start, end + 1 - start);
1023 return create(characters16() + start, end + 1 - start);
1024 }
1025
1026 class UCharPredicate final {
1027 STACK_ALLOCATED();
1028
1029 public:
1030 inline UCharPredicate(CharacterMatchFunctionPtr function)
1031 : m_function(function) {}
1032
1033 inline bool operator()(UChar ch) const { return m_function(ch); }
1034
1035 private:
1036 const CharacterMatchFunctionPtr m_function;
1037 };
1038
1039 class SpaceOrNewlinePredicate final {
1040 STACK_ALLOCATED();
1041
1042 public:
1043 inline bool operator()(UChar ch) const { return isSpaceOrNewline(ch); }
1044 };
1045
1046 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace() {
1047 return stripMatchedCharacters(SpaceOrNewlinePredicate());
1048 }
1049
1050 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace(
1051 IsWhiteSpaceFunctionPtr isWhiteSpace) {
1052 return stripMatchedCharacters(UCharPredicate(isWhiteSpace));
1053 }
1054
1055 template <typename CharType>
1056 ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters(
1057 const CharType* characters,
1058 CharacterMatchFunctionPtr findMatch) {
1059 const CharType* from = characters;
1060 const CharType* fromend = from + m_length;
1061
1062 // Assume the common case will not remove any characters
1063 while (from != fromend && !findMatch(*from))
1064 ++from;
1065 if (from == fromend)
1066 return this;
1067
1068 StringBuffer<CharType> data(m_length);
1069 CharType* to = data.characters();
1070 unsigned outc = from - characters;
1071
1072 if (outc)
1073 memcpy(to, characters, outc * sizeof(CharType));
1074
1075 while (true) {
1076 while (from != fromend && findMatch(*from))
1077 ++from;
1078 while (from != fromend && !findMatch(*from))
1079 to[outc++] = *from++;
1080 if (from == fromend)
1081 break;
1082 }
1083
1084 data.shrink(outc);
1085
1086 return data.release();
1087 }
1088
1089 PassRefPtr<StringImpl> StringImpl::removeCharacters(
1090 CharacterMatchFunctionPtr findMatch) {
1091 if (is8Bit())
1092 return removeCharacters(characters8(), findMatch);
1093 return removeCharacters(characters16(), findMatch);
1094 }
1095
1096 PassRefPtr<StringImpl> StringImpl::remove(unsigned start,
1097 unsigned lengthToRemove) {
1098 if (lengthToRemove <= 0)
1099 return this;
1100 if (start >= m_length)
1101 return this;
1102
1103 lengthToRemove = std::min(m_length - start, lengthToRemove);
1104 unsigned removedEnd = start + lengthToRemove;
1105
1106 if (is8Bit()) {
1107 StringBuffer<LChar> buffer(m_length - lengthToRemove);
1108 copyChars(buffer.characters(), characters8(), start);
1109 copyChars(buffer.characters() + start, characters8() + removedEnd,
1110 m_length - removedEnd);
1111 return buffer.release();
1112 }
1113 StringBuffer<UChar> buffer(m_length - lengthToRemove);
1114 copyChars(buffer.characters(), characters16(), start);
1115 copyChars(buffer.characters() + start, characters16() + removedEnd,
1116 m_length - removedEnd);
1117 return buffer.release();
1118 }
1119
1120 template <typename CharType, class UCharPredicate>
1121 inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(
1122 UCharPredicate predicate,
1123 StripBehavior stripBehavior) {
1124 StringBuffer<CharType> data(m_length);
1125
1126 const CharType* from = getCharacters<CharType>();
1127 const CharType* fromend = from + m_length;
1128 int outc = 0;
1129 bool changedToSpace = false;
1130
1131 CharType* to = data.characters();
1132
1133 if (stripBehavior == StripExtraWhiteSpace) {
1134 while (true) {
1135 while (from != fromend && predicate(*from)) {
1136 if (*from != ' ')
1137 changedToSpace = true;
1138 ++from;
1139 }
1140 while (from != fromend && !predicate(*from))
1141 to[outc++] = *from++;
1142 if (from != fromend)
1143 to[outc++] = ' ';
1144 else
1145 break;
1146 }
1147
1148 if (outc > 0 && to[outc - 1] == ' ')
1149 --outc;
1150 } else {
1151 for (; from != fromend; ++from) {
1152 if (predicate(*from)) {
1153 if (*from != ' ')
1154 changedToSpace = true;
1155 to[outc++] = ' ';
1156 } else {
1157 to[outc++] = *from;
1158 }
1159 }
1160 }
1161
1162 if (static_cast<unsigned>(outc) == m_length && !changedToSpace)
1163 return this;
1164
1165 data.shrink(outc);
1166
1167 return data.release();
1168 }
1169
1170 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(
1171 StripBehavior stripBehavior) {
1172 if (is8Bit())
1173 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(
1174 SpaceOrNewlinePredicate(), stripBehavior);
1175 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(
1176 SpaceOrNewlinePredicate(), stripBehavior);
1177 }
1178
1179 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(
1180 IsWhiteSpaceFunctionPtr isWhiteSpace,
1181 StripBehavior stripBehavior) {
1182 if (is8Bit())
1183 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(
1184 UCharPredicate(isWhiteSpace), stripBehavior);
1185 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(
1186 UCharPredicate(isWhiteSpace), stripBehavior);
1187 }
1188
1189 int StringImpl::toIntStrict(bool* ok, int base) {
1190 if (is8Bit())
1191 return charactersToIntStrict(characters8(), m_length, ok, base);
1192 return charactersToIntStrict(characters16(), m_length, ok, base);
1193 }
1194
1195 unsigned StringImpl::toUIntStrict(bool* ok, int base) {
1196 if (is8Bit())
1197 return charactersToUIntStrict(characters8(), m_length, ok, base);
1198 return charactersToUIntStrict(characters16(), m_length, ok, base);
1199 }
1200
1201 int64_t StringImpl::toInt64Strict(bool* ok, int base) {
1202 if (is8Bit())
1203 return charactersToInt64Strict(characters8(), m_length, ok, base);
1204 return charactersToInt64Strict(characters16(), m_length, ok, base);
1205 }
1206
1207 uint64_t StringImpl::toUInt64Strict(bool* ok, int base) {
1208 if (is8Bit())
1209 return charactersToUInt64Strict(characters8(), m_length, ok, base);
1210 return charactersToUInt64Strict(characters16(), m_length, ok, base);
1211 }
1212
1213 int StringImpl::toInt(bool* ok) {
1214 if (is8Bit())
1215 return charactersToInt(characters8(), m_length, ok);
1216 return charactersToInt(characters16(), m_length, ok);
1217 }
1218
1219 unsigned StringImpl::toUInt(bool* ok) {
1220 if (is8Bit())
1221 return charactersToUInt(characters8(), m_length, ok);
1222 return charactersToUInt(characters16(), m_length, ok);
1223 }
1224
1225 int64_t StringImpl::toInt64(bool* ok) {
1226 if (is8Bit())
1227 return charactersToInt64(characters8(), m_length, ok);
1228 return charactersToInt64(characters16(), m_length, ok);
1229 }
1230
1231 uint64_t StringImpl::toUInt64(bool* ok) {
1232 if (is8Bit())
1233 return charactersToUInt64(characters8(), m_length, ok);
1234 return charactersToUInt64(characters16(), m_length, ok);
1235 }
1236
1237 double StringImpl::toDouble(bool* ok) {
1238 if (is8Bit())
1239 return charactersToDouble(characters8(), m_length, ok);
1240 return charactersToDouble(characters16(), m_length, ok);
1241 }
1242
1243 float StringImpl::toFloat(bool* ok) {
1244 if (is8Bit())
1245 return charactersToFloat(characters8(), m_length, ok);
1246 return charactersToFloat(characters16(), m_length, ok);
1247 }
1248
1249 // Table is based on ftp://ftp.unicode.org/Public/UNIDATA/CaseFolding.txt
1250 const UChar StringImpl::latin1CaseFoldTable[256] = {
1251 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008,
1252 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011,
1253 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a,
1254 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023,
1255 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c,
1256 0x002d, 0x002e, 0x002f, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035,
1257 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e,
1258 0x003f, 0x0040, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
1259 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 0x0070,
1260 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079,
1261 0x007a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 0x0060, 0x0061, 0x0062,
1262 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b,
1263 0x006c, 0x006d, 0x006e, 0x006f, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074,
1264 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d,
1265 0x007e, 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086,
1266 0x0087, 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
1267 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098,
1268 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1,
1269 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00aa,
1270 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0, 0x00b1, 0x00b2, 0x00b3,
1271 0x00b4, 0x03bc, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc,
1272 0x00bd, 0x00be, 0x00bf, 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5,
1273 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee,
1274 0x00ef, 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00d7,
1275 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00df, 0x00e0,
1276 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9,
1277 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 0x00f0, 0x00f1, 0x00f2,
1278 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00f9, 0x00fa, 0x00fb,
1279 0x00fc, 0x00fd, 0x00fe, 0x00ff,
1280 };
1281
1282 bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length) {
1283 DCHECK_GE(length, 0u);
1284 if (a == b)
1285 return true;
1286 while (length--) {
1287 if (StringImpl::latin1CaseFoldTable[*a++] !=
1288 StringImpl::latin1CaseFoldTable[*b++])
1289 return false;
1290 }
1291 return true;
1292 }
1293
1294 bool equalIgnoringCase(const UChar* a, const UChar* b, unsigned length) {
1295 DCHECK_GE(length, 0u);
1296 if (a == b)
1297 return true;
1298 return !Unicode::umemcasecmp(a, b, length);
1299 }
1300
1301 bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length) {
1302 while (length--) {
1303 if (foldCase(*a++) != StringImpl::latin1CaseFoldTable[*b++])
1304 return false;
1305 }
1306 return true;
1307 }
1308
1309 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction,
1310 unsigned start) {
1311 if (is8Bit())
1312 return WTF::find(characters8(), m_length, matchFunction, start);
1313 return WTF::find(characters16(), m_length, matchFunction, start);
1314 }
1315
1316 template <typename SearchCharacterType, typename MatchCharacterType>
1317 ALWAYS_INLINE static size_t findInternal(
1318 const SearchCharacterType* searchCharacters,
1319 const MatchCharacterType* matchCharacters,
1320 unsigned index,
1321 unsigned searchLength,
1322 unsigned matchLength) {
1323 // Optimization: keep a running hash of the strings,
1324 // only call equal() if the hashes match.
1325
1326 // delta is the number of additional times to test; delta == 0 means test only
1327 // once.
1328 unsigned delta = searchLength - matchLength;
1329
1330 unsigned searchHash = 0;
1331 unsigned matchHash = 0;
1332
1333 for (unsigned i = 0; i < matchLength; ++i) {
1334 searchHash += searchCharacters[i];
1335 matchHash += matchCharacters[i];
1336 }
1337
1338 unsigned i = 0;
1339 // keep looping until we match
1340 while (searchHash != matchHash ||
1341 !equal(searchCharacters + i, matchCharacters, matchLength)) {
1342 if (i == delta)
1343 return kNotFound;
1344 searchHash += searchCharacters[i + matchLength];
1345 searchHash -= searchCharacters[i];
1346 ++i;
1347 }
1348 return index + i;
1349 }
1350
1351 size_t StringImpl::find(const StringView& matchString, unsigned index) {
1352 if (UNLIKELY(matchString.isNull()))
1353 return kNotFound;
1354
1355 unsigned matchLength = matchString.length();
1356
1357 // Optimization 1: fast case for strings of length 1.
1358 if (matchLength == 1) {
1359 if (is8Bit())
1360 return WTF::find(characters8(), length(), matchString[0], index);
1361 return WTF::find(characters16(), length(), matchString[0], index);
1362 }
1363
1364 if (UNLIKELY(!matchLength))
1365 return min(index, length());
1366
1367 // Check index & matchLength are in range.
1368 if (index > length())
1369 return kNotFound;
1370 unsigned searchLength = length() - index;
1371 if (matchLength > searchLength)
1372 return kNotFound;
1373
1374 if (is8Bit()) {
1375 if (matchString.is8Bit())
1376 return findInternal(characters8() + index, matchString.characters8(),
1377 index, searchLength, matchLength);
1378 return findInternal(characters8() + index, matchString.characters16(),
1379 index, searchLength, matchLength);
1380 }
1381 if (matchString.is8Bit())
1382 return findInternal(characters16() + index, matchString.characters8(),
1383 index, searchLength, matchLength);
1384 return findInternal(characters16() + index, matchString.characters16(), index,
1385 searchLength, matchLength);
1386 }
1387
1388 template <typename SearchCharacterType, typename MatchCharacterType>
1389 ALWAYS_INLINE static size_t findIgnoringCaseInternal(
1390 const SearchCharacterType* searchCharacters,
1391 const MatchCharacterType* matchCharacters,
1392 unsigned index,
1393 unsigned searchLength,
1394 unsigned matchLength) {
1395 // delta is the number of additional times to test; delta == 0 means test only
1396 // once.
1397 unsigned delta = searchLength - matchLength;
1398
1399 unsigned i = 0;
1400 // keep looping until we match
1401 while (
1402 !equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) {
1403 if (i == delta)
1404 return kNotFound;
1405 ++i;
1406 }
1407 return index + i;
1408 }
1409
1410 size_t StringImpl::findIgnoringCase(const StringView& matchString,
1411 unsigned index) {
1412 if (UNLIKELY(matchString.isNull()))
1413 return kNotFound;
1414
1415 unsigned matchLength = matchString.length();
1416 if (!matchLength)
1417 return min(index, length());
1418
1419 // Check index & matchLength are in range.
1420 if (index > length())
1421 return kNotFound;
1422 unsigned searchLength = length() - index;
1423 if (matchLength > searchLength)
1424 return kNotFound;
1425
1426 if (is8Bit()) {
1427 if (matchString.is8Bit())
1428 return findIgnoringCaseInternal(characters8() + index,
1429 matchString.characters8(), index,
1430 searchLength, matchLength);
1431 return findIgnoringCaseInternal(characters8() + index,
1432 matchString.characters16(), index,
1433 searchLength, matchLength);
1434 }
1435 if (matchString.is8Bit())
1436 return findIgnoringCaseInternal(characters16() + index,
1437 matchString.characters8(), index,
1438 searchLength, matchLength);
1439 return findIgnoringCaseInternal(characters16() + index,
1440 matchString.characters16(), index,
1441 searchLength, matchLength);
1442 }
1443
1444 template <typename SearchCharacterType, typename MatchCharacterType>
1445 ALWAYS_INLINE static size_t findIgnoringASCIICaseInternal(
1446 const SearchCharacterType* searchCharacters,
1447 const MatchCharacterType* matchCharacters,
1448 unsigned index,
1449 unsigned searchLength,
1450 unsigned matchLength) {
1451 // delta is the number of additional times to test; delta == 0 means test only
1452 // once.
1453 unsigned delta = searchLength - matchLength;
1454
1455 unsigned i = 0;
1456 // keep looping until we match
1457 while (!equalIgnoringASCIICase(searchCharacters + i, matchCharacters,
1458 matchLength)) {
1459 if (i == delta)
1460 return kNotFound;
1461 ++i;
1462 }
1463 return index + i;
1464 }
1465
1466 size_t StringImpl::findIgnoringASCIICase(const StringView& matchString,
1467 unsigned index) {
1468 if (UNLIKELY(matchString.isNull()))
1469 return kNotFound;
1470
1471 unsigned matchLength = matchString.length();
1472 if (!matchLength)
1473 return min(index, length());
1474
1475 // Check index & matchLength are in range.
1476 if (index > length())
1477 return kNotFound;
1478 unsigned searchLength = length() - index;
1479 if (matchLength > searchLength)
1480 return kNotFound;
1481
1482 if (is8Bit()) {
1483 if (matchString.is8Bit())
1484 return findIgnoringASCIICaseInternal(characters8() + index,
1485 matchString.characters8(), index,
1486 searchLength, matchLength);
1487 return findIgnoringASCIICaseInternal(characters8() + index,
1488 matchString.characters16(), index,
1489 searchLength, matchLength);
1490 }
1491 if (matchString.is8Bit())
1492 return findIgnoringASCIICaseInternal(characters16() + index,
1493 matchString.characters8(), index,
1494 searchLength, matchLength);
1495 return findIgnoringASCIICaseInternal(characters16() + index,
1496 matchString.characters16(), index,
1497 searchLength, matchLength);
1498 }
1499
1500 size_t StringImpl::reverseFind(UChar c, unsigned index) {
1501 if (is8Bit())
1502 return WTF::reverseFind(characters8(), m_length, c, index);
1503 return WTF::reverseFind(characters16(), m_length, c, index);
1504 }
1505
1506 template <typename SearchCharacterType, typename MatchCharacterType>
1507 ALWAYS_INLINE static size_t reverseFindInternal(
1508 const SearchCharacterType* searchCharacters,
1509 const MatchCharacterType* matchCharacters,
1510 unsigned index,
1511 unsigned length,
1512 unsigned matchLength) {
1513 // Optimization: keep a running hash of the strings,
1514 // only call equal if the hashes match.
1515
1516 // delta is the number of additional times to test; delta == 0 means test only
1517 // once.
1518 unsigned delta = min(index, length - matchLength);
1519
1520 unsigned searchHash = 0;
1521 unsigned matchHash = 0;
1522 for (unsigned i = 0; i < matchLength; ++i) {
1523 searchHash += searchCharacters[delta + i];
1524 matchHash += matchCharacters[i];
1525 }
1526
1527 // keep looping until we match
1528 while (searchHash != matchHash ||
1529 !equal(searchCharacters + delta, matchCharacters, matchLength)) {
1530 if (!delta)
1531 return kNotFound;
1532 --delta;
1533 searchHash -= searchCharacters[delta + matchLength];
1534 searchHash += searchCharacters[delta];
1535 }
1536 return delta;
1537 }
1538
1539 size_t StringImpl::reverseFind(const StringView& matchString, unsigned index) {
1540 if (UNLIKELY(matchString.isNull()))
1541 return kNotFound;
1542
1543 unsigned matchLength = matchString.length();
1544 unsigned ourLength = length();
1545 if (!matchLength)
1546 return min(index, ourLength);
1547
1548 // Optimization 1: fast case for strings of length 1.
1549 if (matchLength == 1) {
1550 if (is8Bit())
1551 return WTF::reverseFind(characters8(), ourLength, matchString[0], index);
1552 return WTF::reverseFind(characters16(), ourLength, matchString[0], index);
1553 }
1554
1555 // Check index & matchLength are in range.
1556 if (matchLength > ourLength)
1557 return kNotFound;
1558
1559 if (is8Bit()) {
1560 if (matchString.is8Bit())
1561 return reverseFindInternal(characters8(), matchString.characters8(),
1562 index, ourLength, matchLength);
1563 return reverseFindInternal(characters8(), matchString.characters16(), index,
1564 ourLength, matchLength);
1565 }
1566 if (matchString.is8Bit())
1567 return reverseFindInternal(characters16(), matchString.characters8(), index,
1568 ourLength, matchLength);
1569 return reverseFindInternal(characters16(), matchString.characters16(), index,
1570 ourLength, matchLength);
1571 }
1572
1573 bool StringImpl::startsWith(UChar character) const {
1574 return m_length && (*this)[0] == character;
1575 }
1576
1577 bool StringImpl::startsWith(const StringView& prefix) const {
1578 if (prefix.length() > length())
1579 return false;
1580 if (is8Bit()) {
1581 if (prefix.is8Bit())
1582 return equal(characters8(), prefix.characters8(), prefix.length());
1583 return equal(characters8(), prefix.characters16(), prefix.length());
1584 }
1585 if (prefix.is8Bit())
1586 return equal(characters16(), prefix.characters8(), prefix.length());
1587 return equal(characters16(), prefix.characters16(), prefix.length());
1588 }
1589
1590 bool StringImpl::startsWithIgnoringCase(const StringView& prefix) const {
1591 if (prefix.length() > length())
1592 return false;
1593 if (is8Bit()) {
1594 if (prefix.is8Bit())
1595 return equalIgnoringCase(characters8(), prefix.characters8(),
1596 prefix.length());
1597 return equalIgnoringCase(characters8(), prefix.characters16(),
1598 prefix.length());
1599 }
1600 if (prefix.is8Bit())
1601 return equalIgnoringCase(characters16(), prefix.characters8(),
1602 prefix.length());
1603 return equalIgnoringCase(characters16(), prefix.characters16(),
1604 prefix.length());
1605 }
1606
1607 bool StringImpl::startsWithIgnoringASCIICase(const StringView& prefix) const {
1608 if (prefix.length() > length())
1609 return false;
1610 if (is8Bit()) {
1611 if (prefix.is8Bit())
1612 return equalIgnoringASCIICase(characters8(), prefix.characters8(),
1613 prefix.length());
1614 return equalIgnoringASCIICase(characters8(), prefix.characters16(),
1615 prefix.length());
1616 }
1617 if (prefix.is8Bit())
1618 return equalIgnoringASCIICase(characters16(), prefix.characters8(),
1619 prefix.length());
1620 return equalIgnoringASCIICase(characters16(), prefix.characters16(),
1621 prefix.length());
1622 }
1623
1624 bool StringImpl::endsWith(UChar character) const {
1625 return m_length && (*this)[m_length - 1] == character;
1626 }
1627
1628 bool StringImpl::endsWith(const StringView& suffix) const {
1629 if (suffix.length() > length())
1630 return false;
1631 unsigned startOffset = length() - suffix.length();
1632 if (is8Bit()) {
1633 if (suffix.is8Bit())
1634 return equal(characters8() + startOffset, suffix.characters8(),
1635 suffix.length());
1636 return equal(characters8() + startOffset, suffix.characters16(),
1637 suffix.length());
1638 }
1639 if (suffix.is8Bit())
1640 return equal(characters16() + startOffset, suffix.characters8(),
1641 suffix.length());
1642 return equal(characters16() + startOffset, suffix.characters16(),
1643 suffix.length());
1644 }
1645
1646 bool StringImpl::endsWithIgnoringCase(const StringView& suffix) const {
1647 if (suffix.length() > length())
1648 return false;
1649 unsigned startOffset = length() - suffix.length();
1650 if (is8Bit()) {
1651 if (suffix.is8Bit())
1652 return equalIgnoringCase(characters8() + startOffset,
1653 suffix.characters8(), suffix.length());
1654 return equalIgnoringCase(characters8() + startOffset, suffix.characters16(),
1655 suffix.length());
1656 }
1657 if (suffix.is8Bit())
1658 return equalIgnoringCase(characters16() + startOffset, suffix.characters8(),
1659 suffix.length());
1660 return equalIgnoringCase(characters16() + startOffset, suffix.characters16(),
1661 suffix.length());
1662 }
1663
1664 bool StringImpl::endsWithIgnoringASCIICase(const StringView& suffix) const {
1665 if (suffix.length() > length())
1666 return false;
1667 unsigned startOffset = length() - suffix.length();
1668 if (is8Bit()) {
1669 if (suffix.is8Bit())
1670 return equalIgnoringASCIICase(characters8() + startOffset,
1671 suffix.characters8(), suffix.length());
1672 return equalIgnoringASCIICase(characters8() + startOffset,
1673 suffix.characters16(), suffix.length());
1674 }
1675 if (suffix.is8Bit())
1676 return equalIgnoringASCIICase(characters16() + startOffset,
1677 suffix.characters8(), suffix.length());
1678 return equalIgnoringASCIICase(characters16() + startOffset,
1679 suffix.characters16(), suffix.length());
1680 }
1681
1682 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC) {
1683 if (oldC == newC)
1684 return this;
1685
1686 if (find(oldC) == kNotFound)
1687 return this;
1688
1689 unsigned i;
1690 if (is8Bit()) {
1691 if (newC <= 0xff) {
1692 LChar* data;
1693 LChar oldChar = static_cast<LChar>(oldC);
1694 LChar newChar = static_cast<LChar>(newC);
1695
1696 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1697
1698 for (i = 0; i != m_length; ++i) {
1699 LChar ch = characters8()[i];
1700 if (ch == oldChar)
1701 ch = newChar;
1702 data[i] = ch;
1703 }
1704 return newImpl.release();
1705 }
1706
1707 // There is the possibility we need to up convert from 8 to 16 bit,
1708 // create a 16 bit string for the result.
1709 UChar* data;
1710 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1711
1712 for (i = 0; i != m_length; ++i) {
1713 UChar ch = characters8()[i];
1714 if (ch == oldC)
1715 ch = newC;
1716 data[i] = ch;
1717 }
1718
1719 return newImpl.release();
1720 }
1721
1722 UChar* data;
1723 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1724
1725 for (i = 0; i != m_length; ++i) {
1726 UChar ch = characters16()[i];
1727 if (ch == oldC)
1728 ch = newC;
1729 data[i] = ch;
1730 }
1731 return newImpl.release();
1732 }
1733
1734 // TODO(esprehn): Passing a null replacement is the same as empty string for
1735 // this method but all others treat null as a no-op. We should choose one
1736 // behavior.
1737 PassRefPtr<StringImpl> StringImpl::replace(unsigned position,
1738 unsigned lengthToReplace,
1739 const StringView& string) {
1740 position = min(position, length());
1741 lengthToReplace = min(lengthToReplace, length() - position);
1742 unsigned lengthToInsert = string.length();
1743 if (!lengthToReplace && !lengthToInsert)
1744 return this;
1745
1746 RELEASE_ASSERT((length() - lengthToReplace) <
1747 (numeric_limits<unsigned>::max() - lengthToInsert));
1748
1749 if (is8Bit() && (string.isNull() || string.is8Bit())) {
1750 LChar* data;
1751 RefPtr<StringImpl> newImpl =
1752 createUninitialized(length() - lengthToReplace + lengthToInsert, data);
1753 memcpy(data, characters8(), position * sizeof(LChar));
1754 if (!string.isNull())
1755 memcpy(data + position, string.characters8(),
1756 lengthToInsert * sizeof(LChar));
1757 memcpy(data + position + lengthToInsert,
1758 characters8() + position + lengthToReplace,
1759 (length() - position - lengthToReplace) * sizeof(LChar));
1760 return newImpl.release();
1761 }
1762 UChar* data;
1763 RefPtr<StringImpl> newImpl =
1764 createUninitialized(length() - lengthToReplace + lengthToInsert, data);
1765 if (is8Bit())
1766 for (unsigned i = 0; i < position; ++i)
1767 data[i] = characters8()[i];
1768 else
1769 memcpy(data, characters16(), position * sizeof(UChar));
1770 if (!string.isNull()) {
1771 if (string.is8Bit())
1772 for (unsigned i = 0; i < lengthToInsert; ++i)
1773 data[i + position] = string.characters8()[i];
1774 else
1775 memcpy(data + position, string.characters16(),
1776 lengthToInsert * sizeof(UChar));
1777 }
1778 if (is8Bit()) {
1779 for (unsigned i = 0; i < length() - position - lengthToReplace; ++i)
1780 data[i + position + lengthToInsert] =
1781 characters8()[i + position + lengthToReplace];
1782 } else {
1783 memcpy(data + position + lengthToInsert,
1784 characters16() + position + lengthToReplace,
1785 (length() - position - lengthToReplace) * sizeof(UChar));
1786 }
1787 return newImpl.release();
1788 }
1789
1790 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern,
1791 const StringView& replacement) {
1792 if (replacement.isNull())
1793 return this;
1794 if (replacement.is8Bit())
1795 return replace(pattern, replacement.characters8(), replacement.length());
1796 return replace(pattern, replacement.characters16(), replacement.length());
1797 }
1798
1799 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern,
1800 const LChar* replacement,
1801 unsigned repStrLength) {
1802 DCHECK(replacement);
1803
1804 size_t srcSegmentStart = 0;
1805 unsigned matchCount = 0;
1806
1807 // Count the matches.
1808 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1809 ++matchCount;
1810 ++srcSegmentStart;
1811 }
1812
1813 // If we have 0 matches then we don't have to do any more work.
1814 if (!matchCount)
1815 return this;
1816
1817 RELEASE_ASSERT(!repStrLength ||
1818 matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1819
1820 unsigned replaceSize = matchCount * repStrLength;
1821 unsigned newSize = m_length - matchCount;
1822 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
1823
1824 newSize += replaceSize;
1825
1826 // Construct the new data.
1827 size_t srcSegmentEnd;
1828 unsigned srcSegmentLength;
1829 srcSegmentStart = 0;
1830 unsigned dstOffset = 0;
1831
1832 if (is8Bit()) {
1833 LChar* data;
1834 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1835
1836 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1837 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1838 memcpy(data + dstOffset, characters8() + srcSegmentStart,
1839 srcSegmentLength * sizeof(LChar));
1840 dstOffset += srcSegmentLength;
1841 memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar));
1842 dstOffset += repStrLength;
1843 srcSegmentStart = srcSegmentEnd + 1;
1844 }
1845
1846 srcSegmentLength = m_length - srcSegmentStart;
1847 memcpy(data + dstOffset, characters8() + srcSegmentStart,
1848 srcSegmentLength * sizeof(LChar));
1849
1850 DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length());
1851
1852 return newImpl.release();
1853 }
1854
1855 UChar* data;
1856 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1857
1858 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1859 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1860 memcpy(data + dstOffset, characters16() + srcSegmentStart,
1861 srcSegmentLength * sizeof(UChar));
1862
1863 dstOffset += srcSegmentLength;
1864 for (unsigned i = 0; i < repStrLength; ++i)
1865 data[i + dstOffset] = replacement[i];
1866
1867 dstOffset += repStrLength;
1868 srcSegmentStart = srcSegmentEnd + 1;
1869 }
1870
1871 srcSegmentLength = m_length - srcSegmentStart;
1872 memcpy(data + dstOffset, characters16() + srcSegmentStart,
1873 srcSegmentLength * sizeof(UChar));
1874
1875 DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length());
1876
1877 return newImpl.release();
1878 }
1879
1880 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern,
1881 const UChar* replacement,
1882 unsigned repStrLength) {
1883 DCHECK(replacement);
1884
1885 size_t srcSegmentStart = 0;
1886 unsigned matchCount = 0;
1887
1888 // Count the matches.
1889 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1890 ++matchCount;
1891 ++srcSegmentStart;
1892 }
1893
1894 // If we have 0 matches then we don't have to do any more work.
1895 if (!matchCount)
1896 return this;
1897
1898 RELEASE_ASSERT(!repStrLength ||
1899 matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1900
1901 unsigned replaceSize = matchCount * repStrLength;
1902 unsigned newSize = m_length - matchCount;
1903 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
1904
1905 newSize += replaceSize;
1906
1907 // Construct the new data.
1908 size_t srcSegmentEnd;
1909 unsigned srcSegmentLength;
1910 srcSegmentStart = 0;
1911 unsigned dstOffset = 0;
1912
1913 if (is8Bit()) {
1914 UChar* data;
1915 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1916
1917 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1918 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1919 for (unsigned i = 0; i < srcSegmentLength; ++i)
1920 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1921
1922 dstOffset += srcSegmentLength;
1923 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
1924
1925 dstOffset += repStrLength;
1926 srcSegmentStart = srcSegmentEnd + 1;
1927 }
1928
1929 srcSegmentLength = m_length - srcSegmentStart;
1930 for (unsigned i = 0; i < srcSegmentLength; ++i)
1931 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1932
1933 DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length());
1934
1935 return newImpl.release();
1936 }
1937
1938 UChar* data;
1939 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1940
1941 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1942 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1943 memcpy(data + dstOffset, characters16() + srcSegmentStart,
1944 srcSegmentLength * sizeof(UChar));
1945
1946 dstOffset += srcSegmentLength;
1947 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
1948
1949 dstOffset += repStrLength;
1950 srcSegmentStart = srcSegmentEnd + 1;
1951 }
1952
1953 srcSegmentLength = m_length - srcSegmentStart;
1954 memcpy(data + dstOffset, characters16() + srcSegmentStart,
1955 srcSegmentLength * sizeof(UChar));
1956
1957 DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length());
1958
1959 return newImpl.release();
1960 }
1961
1962 PassRefPtr<StringImpl> StringImpl::replace(const StringView& pattern,
1963 const StringView& replacement) {
1964 if (pattern.isNull() || replacement.isNull())
1965 return this;
1966
1967 unsigned patternLength = pattern.length();
1968 if (!patternLength)
1969 return this;
1970
1971 unsigned repStrLength = replacement.length();
1972 size_t srcSegmentStart = 0;
1973 unsigned matchCount = 0;
1974
1975 // Count the matches.
1976 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1977 ++matchCount;
1978 srcSegmentStart += patternLength;
1979 }
1980
1981 // If we have 0 matches, we don't have to do any more work
1982 if (!matchCount)
1983 return this;
1984
1985 unsigned newSize = m_length - matchCount * patternLength;
1986 RELEASE_ASSERT(!repStrLength ||
1987 matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1988
1989 RELEASE_ASSERT(newSize <=
1990 (numeric_limits<unsigned>::max() - matchCount * repStrLength));
1991
1992 newSize += matchCount * repStrLength;
1993
1994 // Construct the new data
1995 size_t srcSegmentEnd;
1996 unsigned srcSegmentLength;
1997 srcSegmentStart = 0;
1998 unsigned dstOffset = 0;
1999 bool srcIs8Bit = is8Bit();
2000 bool replacementIs8Bit = replacement.is8Bit();
2001
2002 // There are 4 cases:
2003 // 1. This and replacement are both 8 bit.
2004 // 2. This and replacement are both 16 bit.
2005 // 3. This is 8 bit and replacement is 16 bit.
2006 // 4. This is 16 bit and replacement is 8 bit.
2007 if (srcIs8Bit && replacementIs8Bit) {
2008 // Case 1
2009 LChar* data;
2010 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
2011 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
2012 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
2013 memcpy(data + dstOffset, characters8() + srcSegmentStart,
2014 srcSegmentLength * sizeof(LChar));
2015 dstOffset += srcSegmentLength;
2016 memcpy(data + dstOffset, replacement.characters8(),
2017 repStrLength * sizeof(LChar));
2018 dstOffset += repStrLength;
2019 srcSegmentStart = srcSegmentEnd + patternLength;
2020 }
2021
2022 srcSegmentLength = m_length - srcSegmentStart;
2023 memcpy(data + dstOffset, characters8() + srcSegmentStart,
2024 srcSegmentLength * sizeof(LChar));
2025
2026 DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length());
2027
2028 return newImpl.release();
2029 }
2030
2031 UChar* data;
2032 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
2033 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
2034 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
2035 if (srcIs8Bit) {
2036 // Case 3.
2037 for (unsigned i = 0; i < srcSegmentLength; ++i)
2038 data[i + dstOffset] = characters8()[i + srcSegmentStart];
2039 } else {
2040 // Case 2 & 4.
2041 memcpy(data + dstOffset, characters16() + srcSegmentStart,
2042 srcSegmentLength * sizeof(UChar));
2043 }
2044 dstOffset += srcSegmentLength;
2045 if (replacementIs8Bit) {
2046 // Cases 2 & 3.
2047 for (unsigned i = 0; i < repStrLength; ++i)
2048 data[i + dstOffset] = replacement.characters8()[i];
2049 } else {
2050 // Case 4
2051 memcpy(data + dstOffset, replacement.characters16(),
2052 repStrLength * sizeof(UChar));
2053 }
2054 dstOffset += repStrLength;
2055 srcSegmentStart = srcSegmentEnd + patternLength;
2056 }
2057
2058 srcSegmentLength = m_length - srcSegmentStart;
2059 if (srcIs8Bit) {
2060 // Case 3.
2061 for (unsigned i = 0; i < srcSegmentLength; ++i)
2062 data[i + dstOffset] = characters8()[i + srcSegmentStart];
2063 } else {
2064 // Cases 2 & 4.
2065 memcpy(data + dstOffset, characters16() + srcSegmentStart,
2066 srcSegmentLength * sizeof(UChar));
2067 }
2068
2069 DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length());
2070
2071 return newImpl.release();
2072 }
2073
2074 PassRefPtr<StringImpl> StringImpl::upconvertedString() {
2075 if (is8Bit())
2076 return String::make16BitFrom8BitSource(characters8(), m_length)
2077 .releaseImpl();
2078 return this;
2079 }
2080
2081 static inline bool stringImplContentEqual(const StringImpl* a,
2082 const StringImpl* b) {
2083 unsigned aLength = a->length();
2084 unsigned bLength = b->length();
2085 if (aLength != bLength)
2086 return false;
2087
2088 if (a->is8Bit()) {
2089 if (b->is8Bit())
2090 return equal(a->characters8(), b->characters8(), aLength);
2091
2092 return equal(a->characters8(), b->characters16(), aLength);
2093 }
2094
2095 if (b->is8Bit())
2096 return equal(a->characters16(), b->characters8(), aLength);
2097
2098 return equal(a->characters16(), b->characters16(), aLength);
2099 }
2100
2101 bool equal(const StringImpl* a, const StringImpl* b) {
2102 if (a == b)
2103 return true;
2104 if (!a || !b)
2105 return false;
2106 if (a->isAtomic() && b->isAtomic())
2107 return false;
2108
2109 return stringImplContentEqual(a, b);
2110 }
2111
2112 template <typename CharType>
2113 inline bool equalInternal(const StringImpl* a,
2114 const CharType* b,
2115 unsigned length) {
2116 if (!a)
2117 return !b;
2118 if (!b)
2119 return false;
2120
2121 if (a->length() != length)
2122 return false;
2123 if (a->is8Bit())
2124 return equal(a->characters8(), b, length);
2125 return equal(a->characters16(), b, length);
2126 }
2127
2128 bool equal(const StringImpl* a, const LChar* b, unsigned length) {
2129 return equalInternal(a, b, length);
2130 }
2131
2132 bool equal(const StringImpl* a, const UChar* b, unsigned length) {
2133 return equalInternal(a, b, length);
2134 }
2135
2136 bool equal(const StringImpl* a, const LChar* b) {
2137 if (!a)
2138 return !b;
2139 if (!b)
2140 return !a;
2141
2142 unsigned length = a->length();
2143
2144 if (a->is8Bit()) {
2145 const LChar* aPtr = a->characters8();
2146 for (unsigned i = 0; i != length; ++i) {
2147 LChar bc = b[i];
2148 LChar ac = aPtr[i];
2149 if (!bc)
2150 return false;
2151 if (ac != bc)
2152 return false;
2153 }
2154
2155 return !b[length];
2156 }
2157
2158 const UChar* aPtr = a->characters16();
2159 for (unsigned i = 0; i != length; ++i) {
2160 LChar bc = b[i];
2161 if (!bc)
2162 return false;
2163 if (aPtr[i] != bc)
2164 return false;
2165 }
2166
2167 return !b[length];
2168 }
2169
2170 bool equalNonNull(const StringImpl* a, const StringImpl* b) {
2171 DCHECK(a);
2172 DCHECK(b);
2173 if (a == b)
2174 return true;
2175
2176 return stringImplContentEqual(a, b);
2177 }
2178
2179 bool equalIgnoringNullity(StringImpl* a, StringImpl* b) {
2180 if (!a && b && !b->length())
2181 return true;
2182 if (!b && a && !a->length())
2183 return true;
2184 return equal(a, b);
2185 }
2186
2187 template <typename CharacterType1, typename CharacterType2>
2188 int codePointCompareIgnoringASCIICase(unsigned l1,
2189 unsigned l2,
2190 const CharacterType1* c1,
2191 const CharacterType2* c2) {
2192 const unsigned lmin = l1 < l2 ? l1 : l2;
2193 unsigned pos = 0;
2194 while (pos < lmin && toASCIILower(*c1) == toASCIILower(*c2)) {
2195 ++c1;
2196 ++c2;
2197 ++pos;
2198 }
2199
2200 if (pos < lmin)
2201 return (toASCIILower(c1[0]) > toASCIILower(c2[0])) ? 1 : -1;
2202
2203 if (l1 == l2)
2204 return 0;
2205
2206 return (l1 > l2) ? 1 : -1;
2207 }
2208
2209 int codePointCompareIgnoringASCIICase(const StringImpl* string1,
2210 const LChar* string2) {
2211 unsigned length1 = string1 ? string1->length() : 0;
2212 size_t length2 = string2 ? strlen(reinterpret_cast<const char*>(string2)) : 0;
2213
2214 if (!string1)
2215 return length2 > 0 ? -1 : 0;
2216
2217 if (!string2)
2218 return length1 > 0 ? 1 : 0;
2219
2220 if (string1->is8Bit())
2221 return codePointCompareIgnoringASCIICase(length1, length2,
2222 string1->characters8(), string2);
2223 return codePointCompareIgnoringASCIICase(length1, length2,
2224 string1->characters16(), string2);
2225 }
2226
2227 UChar32 toUpper(UChar32 c, const AtomicString& localeIdentifier) {
2228 if (!localeIdentifier.isNull()) {
2229 if (localeIdMatchesLang(localeIdentifier, "tr") ||
2230 localeIdMatchesLang(localeIdentifier, "az")) {
2231 if (c == 'i')
2232 return latinCapitalLetterIWithDotAbove;
2233 if (c == latinSmallLetterDotlessI)
2234 return 'I';
2235 } else if (localeIdMatchesLang(localeIdentifier, "lt")) {
2236 // TODO(rob.buis) implement upper-casing rules for lt
2237 // like in StringImpl::upper(locale).
2238 }
2239 }
2240
2241 return toUpper(c);
2242 }
2243
2244 } // namespace WTF
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/wtf/text/StringImpl.h ('k') | third_party/WebKit/Source/wtf/text/StringImplCF.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698