| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org) | |
| 3 * (C) 1999 Antti Koivisto (koivisto@kde.org) | |
| 4 * (C) 2001 Dirk Mueller ( mueller@kde.org ) | |
| 5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All | |
| 6 * rights reserved. | |
| 7 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net) | |
| 8 * | |
| 9 * This library is free software; you can redistribute it and/or | |
| 10 * modify it under the terms of the GNU Library General Public | |
| 11 * License as published by the Free Software Foundation; either | |
| 12 * version 2 of the License, or (at your option) any later version. | |
| 13 * | |
| 14 * This library is distributed in the hope that it will be useful, | |
| 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 17 * Library General Public License for more details. | |
| 18 * | |
| 19 * You should have received a copy of the GNU Library General Public License | |
| 20 * along with this library; see the file COPYING.LIB. If not, write to | |
| 21 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
| 22 * Boston, MA 02110-1301, USA. | |
| 23 * | |
| 24 */ | |
| 25 | |
| 26 #include "wtf/text/StringImpl.h" | |
| 27 | |
| 28 #include "wtf/DynamicAnnotations.h" | |
| 29 #include "wtf/LeakAnnotations.h" | |
| 30 #include "wtf/PtrUtil.h" | |
| 31 #include "wtf/StaticConstructors.h" | |
| 32 #include "wtf/StdLibExtras.h" | |
| 33 #include "wtf/allocator/Partitions.h" | |
| 34 #include "wtf/text/AtomicString.h" | |
| 35 #include "wtf/text/AtomicStringTable.h" | |
| 36 #include "wtf/text/CString.h" | |
| 37 #include "wtf/text/CharacterNames.h" | |
| 38 #include "wtf/text/StringBuffer.h" | |
| 39 #include "wtf/text/StringHash.h" | |
| 40 #include "wtf/text/StringToNumber.h" | |
| 41 #include <algorithm> | |
| 42 #include <memory> | |
| 43 | |
| 44 #ifdef STRING_STATS | |
| 45 #include "wtf/DataLog.h" | |
| 46 #include "wtf/HashMap.h" | |
| 47 #include "wtf/HashSet.h" | |
| 48 #include "wtf/RefCounted.h" | |
| 49 #include "wtf/ThreadingPrimitives.h" | |
| 50 #include <unistd.h> | |
| 51 #endif | |
| 52 | |
| 53 using namespace std; | |
| 54 | |
| 55 namespace WTF { | |
| 56 | |
| 57 using namespace Unicode; | |
| 58 | |
| 59 // As of Jan 2017, StringImpl needs 2 * sizeof(int) + 29 bits of data, and | |
| 60 // sizeof(ThreadRestrictionVerifier) is 16 bytes. Thus, in DCHECK mode the | |
| 61 // class may be padded to 32 bytes. | |
| 62 #if DCHECK_IS_ON() | |
| 63 static_assert(sizeof(StringImpl) <= 8 * sizeof(int), | |
| 64 "StringImpl should stay small"); | |
| 65 #else | |
| 66 static_assert(sizeof(StringImpl) <= 3 * sizeof(int), | |
| 67 "StringImpl should stay small"); | |
| 68 #endif | |
| 69 | |
| 70 #ifdef STRING_STATS | |
| 71 | |
| 72 static Mutex& statsMutex() { | |
| 73 DEFINE_STATIC_LOCAL(Mutex, mutex, ()); | |
| 74 return mutex; | |
| 75 } | |
| 76 | |
| 77 static HashSet<void*>& liveStrings() { | |
| 78 // Notice that we can't use HashSet<StringImpl*> because then HashSet would | |
| 79 // dedup identical strings. | |
| 80 DEFINE_STATIC_LOCAL(HashSet<void*>, strings, ()); | |
| 81 return strings; | |
| 82 } | |
| 83 | |
| 84 void addStringForStats(StringImpl* string) { | |
| 85 MutexLocker locker(statsMutex()); | |
| 86 liveStrings().add(string); | |
| 87 } | |
| 88 | |
| 89 void removeStringForStats(StringImpl* string) { | |
| 90 MutexLocker locker(statsMutex()); | |
| 91 liveStrings().remove(string); | |
| 92 } | |
| 93 | |
| 94 static void fillWithSnippet(const StringImpl* string, Vector<char>& snippet) { | |
| 95 const unsigned kMaxSnippetLength = 64; | |
| 96 snippet.clear(); | |
| 97 | |
| 98 size_t expectedLength = std::min(string->length(), kMaxSnippetLength); | |
| 99 if (expectedLength == kMaxSnippetLength) | |
| 100 expectedLength += 3; // For the "...". | |
| 101 ++expectedLength; // For the terminating '\0'. | |
| 102 snippet.reserveCapacity(expectedLength); | |
| 103 | |
| 104 size_t i; | |
| 105 for (i = 0; i < string->length() && i < kMaxSnippetLength; ++i) { | |
| 106 UChar c = (*string)[i]; | |
| 107 if (isASCIIPrintable(c)) | |
| 108 snippet.append(c); | |
| 109 else | |
| 110 snippet.append('?'); | |
| 111 } | |
| 112 if (i < string->length()) { | |
| 113 snippet.append('.'); | |
| 114 snippet.append('.'); | |
| 115 snippet.append('.'); | |
| 116 } | |
| 117 snippet.append('\0'); | |
| 118 } | |
| 119 | |
| 120 static bool isUnnecessarilyWide(const StringImpl* string) { | |
| 121 if (string->is8Bit()) | |
| 122 return false; | |
| 123 UChar c = 0; | |
| 124 for (unsigned i = 0; i < string->length(); ++i) | |
| 125 c |= (*string)[i] >> 8; | |
| 126 return !c; | |
| 127 } | |
| 128 | |
| 129 class PerStringStats : public RefCounted<PerStringStats> { | |
| 130 public: | |
| 131 static PassRefPtr<PerStringStats> create() { | |
| 132 return adoptRef(new PerStringStats); | |
| 133 } | |
| 134 | |
| 135 void add(const StringImpl* string) { | |
| 136 ++m_numberOfCopies; | |
| 137 if (!m_length) { | |
| 138 m_length = string->length(); | |
| 139 fillWithSnippet(string, m_snippet); | |
| 140 } | |
| 141 if (string->isAtomic()) | |
| 142 ++m_numberOfAtomicCopies; | |
| 143 if (isUnnecessarilyWide(string)) | |
| 144 m_unnecessarilyWide = true; | |
| 145 } | |
| 146 | |
| 147 size_t totalCharacters() const { return m_numberOfCopies * m_length; } | |
| 148 | |
| 149 void print() { | |
| 150 const char* status = "ok"; | |
| 151 if (m_unnecessarilyWide) | |
| 152 status = "16"; | |
| 153 dataLogF("%8u copies (%s) of length %8u %s\n", m_numberOfCopies, status, | |
| 154 m_length, m_snippet.data()); | |
| 155 } | |
| 156 | |
| 157 bool m_unnecessarilyWide; | |
| 158 unsigned m_numberOfCopies; | |
| 159 unsigned m_length; | |
| 160 unsigned m_numberOfAtomicCopies; | |
| 161 Vector<char> m_snippet; | |
| 162 | |
| 163 private: | |
| 164 PerStringStats() | |
| 165 : m_unnecessarilyWide(false), | |
| 166 m_numberOfCopies(0), | |
| 167 m_length(0), | |
| 168 m_numberOfAtomicCopies(0) {} | |
| 169 }; | |
| 170 | |
| 171 bool operator<(const RefPtr<PerStringStats>& a, | |
| 172 const RefPtr<PerStringStats>& b) { | |
| 173 if (a->m_unnecessarilyWide != b->m_unnecessarilyWide) | |
| 174 return !a->m_unnecessarilyWide && b->m_unnecessarilyWide; | |
| 175 if (a->totalCharacters() != b->totalCharacters()) | |
| 176 return a->totalCharacters() < b->totalCharacters(); | |
| 177 if (a->m_numberOfCopies != b->m_numberOfCopies) | |
| 178 return a->m_numberOfCopies < b->m_numberOfCopies; | |
| 179 if (a->m_length != b->m_length) | |
| 180 return a->m_length < b->m_length; | |
| 181 return a->m_numberOfAtomicCopies < b->m_numberOfAtomicCopies; | |
| 182 } | |
| 183 | |
| 184 static void printLiveStringStats(void*) { | |
| 185 MutexLocker locker(statsMutex()); | |
| 186 HashSet<void*>& strings = liveStrings(); | |
| 187 | |
| 188 HashMap<StringImpl*, RefPtr<PerStringStats>> stats; | |
| 189 for (HashSet<void*>::iterator iter = strings.begin(); iter != strings.end(); | |
| 190 ++iter) { | |
| 191 StringImpl* string = static_cast<StringImpl*>(*iter); | |
| 192 HashMap<StringImpl*, RefPtr<PerStringStats>>::iterator entry = | |
| 193 stats.find(string); | |
| 194 RefPtr<PerStringStats> value = | |
| 195 entry == stats.end() ? RefPtr<PerStringStats>(PerStringStats::create()) | |
| 196 : entry->value; | |
| 197 value->add(string); | |
| 198 stats.set(string, value.release()); | |
| 199 } | |
| 200 | |
| 201 Vector<RefPtr<PerStringStats>> all; | |
| 202 for (HashMap<StringImpl*, RefPtr<PerStringStats>>::iterator iter = | |
| 203 stats.begin(); | |
| 204 iter != stats.end(); ++iter) | |
| 205 all.append(iter->value); | |
| 206 | |
| 207 std::sort(all.begin(), all.end()); | |
| 208 std::reverse(all.begin(), all.end()); | |
| 209 for (size_t i = 0; i < 20 && i < all.size(); ++i) | |
| 210 all[i]->print(); | |
| 211 } | |
| 212 | |
| 213 StringStats StringImpl::m_stringStats; | |
| 214 | |
| 215 unsigned StringStats::s_stringRemovesTillPrintStats = | |
| 216 StringStats::s_printStringStatsFrequency; | |
| 217 | |
| 218 void StringStats::removeString(StringImpl* string) { | |
| 219 unsigned length = string->length(); | |
| 220 --m_totalNumberStrings; | |
| 221 | |
| 222 if (string->is8Bit()) { | |
| 223 --m_number8BitStrings; | |
| 224 m_total8BitData -= length; | |
| 225 } else { | |
| 226 --m_number16BitStrings; | |
| 227 m_total16BitData -= length; | |
| 228 } | |
| 229 | |
| 230 if (!--s_stringRemovesTillPrintStats) { | |
| 231 s_stringRemovesTillPrintStats = s_printStringStatsFrequency; | |
| 232 printStats(); | |
| 233 } | |
| 234 } | |
| 235 | |
| 236 void StringStats::printStats() { | |
| 237 dataLogF("String stats for process id %d:\n", getpid()); | |
| 238 | |
| 239 unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitData; | |
| 240 double percent8Bit = | |
| 241 m_totalNumberStrings | |
| 242 ? ((double)m_number8BitStrings * 100) / (double)m_totalNumberStrings | |
| 243 : 0.0; | |
| 244 double average8bitLength = | |
| 245 m_number8BitStrings | |
| 246 ? (double)m_total8BitData / (double)m_number8BitStrings | |
| 247 : 0.0; | |
| 248 dataLogF( | |
| 249 "%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length " | |
| 250 "%6.1f\n", | |
| 251 m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData, | |
| 252 average8bitLength); | |
| 253 | |
| 254 double percent16Bit = | |
| 255 m_totalNumberStrings | |
| 256 ? ((double)m_number16BitStrings * 100) / (double)m_totalNumberStrings | |
| 257 : 0.0; | |
| 258 double average16bitLength = | |
| 259 m_number16BitStrings | |
| 260 ? (double)m_total16BitData / (double)m_number16BitStrings | |
| 261 : 0.0; | |
| 262 dataLogF( | |
| 263 "%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length " | |
| 264 "%6.1f\n", | |
| 265 m_number16BitStrings, percent16Bit, m_total16BitData, | |
| 266 m_total16BitData * 2, average16bitLength); | |
| 267 | |
| 268 double averageLength = | |
| 269 m_totalNumberStrings | |
| 270 ? (double)totalNumberCharacters / (double)m_totalNumberStrings | |
| 271 : 0.0; | |
| 272 unsigned long long totalDataBytes = m_total8BitData + m_total16BitData * 2; | |
| 273 dataLogF( | |
| 274 "%8u Total %12llu chars %12llu bytes avg length " | |
| 275 "%6.1f\n", | |
| 276 m_totalNumberStrings, totalNumberCharacters, totalDataBytes, | |
| 277 averageLength); | |
| 278 unsigned long long totalSavedBytes = m_total8BitData; | |
| 279 double percentSavings = totalSavedBytes | |
| 280 ? ((double)totalSavedBytes * 100) / | |
| 281 (double)(totalDataBytes + totalSavedBytes) | |
| 282 : 0.0; | |
| 283 dataLogF(" Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes, | |
| 284 percentSavings); | |
| 285 | |
| 286 unsigned totalOverhead = m_totalNumberStrings * sizeof(StringImpl); | |
| 287 double overheadPercent = (double)totalOverhead / (double)totalDataBytes * 100; | |
| 288 dataLogF(" StringImpl overheader: %8u (%5.2f%%)\n", totalOverhead, | |
| 289 overheadPercent); | |
| 290 | |
| 291 internal::callOnMainThread(&printLiveStringStats, nullptr); | |
| 292 } | |
| 293 #endif | |
| 294 | |
| 295 void* StringImpl::operator new(size_t size) { | |
| 296 DCHECK_EQ(size, sizeof(StringImpl)); | |
| 297 return Partitions::bufferMalloc(size, "WTF::StringImpl"); | |
| 298 } | |
| 299 | |
| 300 void StringImpl::operator delete(void* ptr) { | |
| 301 Partitions::bufferFree(ptr); | |
| 302 } | |
| 303 | |
| 304 inline StringImpl::~StringImpl() { | |
| 305 DCHECK(!isStatic()); | |
| 306 | |
| 307 STRING_STATS_REMOVE_STRING(this); | |
| 308 | |
| 309 if (isAtomic()) | |
| 310 AtomicStringTable::instance().remove(this); | |
| 311 } | |
| 312 | |
| 313 void StringImpl::destroyIfNotStatic() const { | |
| 314 if (!isStatic()) | |
| 315 delete this; | |
| 316 } | |
| 317 | |
| 318 void StringImpl::updateContainsOnlyASCII() const { | |
| 319 m_containsOnlyASCII = is8Bit() | |
| 320 ? charactersAreAllASCII(characters8(), length()) | |
| 321 : charactersAreAllASCII(characters16(), length()); | |
| 322 m_needsASCIICheck = false; | |
| 323 } | |
| 324 | |
| 325 bool StringImpl::isSafeToSendToAnotherThread() const { | |
| 326 if (isStatic()) | |
| 327 return true; | |
| 328 // AtomicStrings are not safe to send between threads as ~StringImpl() | |
| 329 // will try to remove them from the wrong AtomicStringTable. | |
| 330 if (isAtomic()) | |
| 331 return false; | |
| 332 if (hasOneRef()) | |
| 333 return true; | |
| 334 return false; | |
| 335 } | |
| 336 | |
| 337 #if DCHECK_IS_ON() | |
| 338 std::string StringImpl::asciiForDebugging() const { | |
| 339 CString ascii = String(isolatedCopy()->substring(0, 128)).ascii(); | |
| 340 return std::string(ascii.data(), ascii.length()); | |
| 341 } | |
| 342 #endif | |
| 343 | |
| 344 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, | |
| 345 LChar*& data) { | |
| 346 if (!length) { | |
| 347 data = 0; | |
| 348 return empty; | |
| 349 } | |
| 350 | |
| 351 // Allocate a single buffer large enough to contain the StringImpl | |
| 352 // struct as well as the data which it contains. This removes one | |
| 353 // heap allocation from this call. | |
| 354 StringImpl* string = static_cast<StringImpl*>(Partitions::bufferMalloc( | |
| 355 allocationSize<LChar>(length), "WTF::StringImpl")); | |
| 356 | |
| 357 data = reinterpret_cast<LChar*>(string + 1); | |
| 358 return adoptRef(new (string) StringImpl(length, Force8BitConstructor)); | |
| 359 } | |
| 360 | |
| 361 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, | |
| 362 UChar*& data) { | |
| 363 if (!length) { | |
| 364 data = 0; | |
| 365 return empty; | |
| 366 } | |
| 367 | |
| 368 // Allocate a single buffer large enough to contain the StringImpl | |
| 369 // struct as well as the data which it contains. This removes one | |
| 370 // heap allocation from this call. | |
| 371 StringImpl* string = static_cast<StringImpl*>(Partitions::bufferMalloc( | |
| 372 allocationSize<UChar>(length), "WTF::StringImpl")); | |
| 373 | |
| 374 data = reinterpret_cast<UChar*>(string + 1); | |
| 375 return adoptRef(new (string) StringImpl(length)); | |
| 376 } | |
| 377 | |
| 378 static StaticStringsTable& staticStrings() { | |
| 379 DEFINE_STATIC_LOCAL(StaticStringsTable, staticStrings, ()); | |
| 380 return staticStrings; | |
| 381 } | |
| 382 | |
| 383 #if DCHECK_IS_ON() | |
| 384 static bool s_allowCreationOfStaticStrings = true; | |
| 385 #endif | |
| 386 | |
| 387 const StaticStringsTable& StringImpl::allStaticStrings() { | |
| 388 return staticStrings(); | |
| 389 } | |
| 390 | |
| 391 void StringImpl::freezeStaticStrings() { | |
| 392 DCHECK(isMainThread()); | |
| 393 | |
| 394 #if DCHECK_IS_ON() | |
| 395 s_allowCreationOfStaticStrings = false; | |
| 396 #endif | |
| 397 } | |
| 398 | |
| 399 unsigned StringImpl::m_highestStaticStringLength = 0; | |
| 400 | |
| 401 DEFINE_GLOBAL(StringImpl, globalEmpty); | |
| 402 DEFINE_GLOBAL(StringImpl, globalEmpty16Bit); | |
| 403 // Callers need the global empty strings to be non-const. | |
| 404 StringImpl* StringImpl::empty = const_cast<StringImpl*>(&globalEmpty); | |
| 405 StringImpl* StringImpl::empty16Bit = const_cast<StringImpl*>(&globalEmpty16Bit); | |
| 406 void StringImpl::initStatics() { | |
| 407 new ((void*)empty) StringImpl(ConstructEmptyString); | |
| 408 new ((void*)empty16Bit) StringImpl(ConstructEmptyString16Bit); | |
| 409 WTF_ANNOTATE_BENIGN_RACE(StringImpl::empty, | |
| 410 "Benign race on the reference counter of a static " | |
| 411 "string created by StringImpl::empty"); | |
| 412 WTF_ANNOTATE_BENIGN_RACE(StringImpl::empty16Bit, | |
| 413 "Benign race on the reference counter of a static " | |
| 414 "string created by StringImpl::empty16Bit"); | |
| 415 } | |
| 416 | |
| 417 StringImpl* StringImpl::createStatic(const char* string, | |
| 418 unsigned length, | |
| 419 unsigned hash) { | |
| 420 #if DCHECK_IS_ON() | |
| 421 DCHECK(s_allowCreationOfStaticStrings); | |
| 422 #endif | |
| 423 DCHECK(string); | |
| 424 DCHECK(length); | |
| 425 | |
| 426 StaticStringsTable::const_iterator it = staticStrings().find(hash); | |
| 427 if (it != staticStrings().end()) { | |
| 428 DCHECK(!memcmp(string, it->value + 1, length * sizeof(LChar))); | |
| 429 return it->value; | |
| 430 } | |
| 431 | |
| 432 // Allocate a single buffer large enough to contain the StringImpl | |
| 433 // struct as well as the data which it contains. This removes one | |
| 434 // heap allocation from this call. | |
| 435 RELEASE_ASSERT(length <= | |
| 436 ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / | |
| 437 sizeof(LChar))); | |
| 438 size_t size = sizeof(StringImpl) + length * sizeof(LChar); | |
| 439 | |
| 440 WTF_INTERNAL_LEAK_SANITIZER_DISABLED_SCOPE; | |
| 441 StringImpl* impl = static_cast<StringImpl*>( | |
| 442 Partitions::bufferMalloc(size, "WTF::StringImpl")); | |
| 443 | |
| 444 LChar* data = reinterpret_cast<LChar*>(impl + 1); | |
| 445 impl = new (impl) StringImpl(length, hash, StaticString); | |
| 446 memcpy(data, string, length * sizeof(LChar)); | |
| 447 #if DCHECK_IS_ON() | |
| 448 impl->assertHashIsCorrect(); | |
| 449 #endif | |
| 450 | |
| 451 DCHECK(isMainThread()); | |
| 452 m_highestStaticStringLength = std::max(m_highestStaticStringLength, length); | |
| 453 staticStrings().insert(hash, impl); | |
| 454 WTF_ANNOTATE_BENIGN_RACE(impl, | |
| 455 "Benign race on the reference counter of a static " | |
| 456 "string created by StringImpl::createStatic"); | |
| 457 | |
| 458 return impl; | |
| 459 } | |
| 460 | |
| 461 void StringImpl::reserveStaticStringsCapacityForSize(unsigned size) { | |
| 462 #if DCHECK_IS_ON() | |
| 463 DCHECK(s_allowCreationOfStaticStrings); | |
| 464 #endif | |
| 465 staticStrings().reserveCapacityForSize(size); | |
| 466 } | |
| 467 | |
| 468 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, | |
| 469 unsigned length) { | |
| 470 if (!characters || !length) | |
| 471 return empty; | |
| 472 | |
| 473 UChar* data; | |
| 474 RefPtr<StringImpl> string = createUninitialized(length, data); | |
| 475 memcpy(data, characters, length * sizeof(UChar)); | |
| 476 return string.release(); | |
| 477 } | |
| 478 | |
| 479 PassRefPtr<StringImpl> StringImpl::create(const LChar* characters, | |
| 480 unsigned length) { | |
| 481 if (!characters || !length) | |
| 482 return empty; | |
| 483 | |
| 484 LChar* data; | |
| 485 RefPtr<StringImpl> string = createUninitialized(length, data); | |
| 486 memcpy(data, characters, length * sizeof(LChar)); | |
| 487 return string.release(); | |
| 488 } | |
| 489 | |
| 490 PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters, | |
| 491 unsigned length) { | |
| 492 if (!characters || !length) | |
| 493 return empty; | |
| 494 | |
| 495 LChar* data; | |
| 496 RefPtr<StringImpl> string = createUninitialized(length, data); | |
| 497 | |
| 498 for (size_t i = 0; i < length; ++i) { | |
| 499 if (characters[i] & 0xff00) | |
| 500 return create(characters, length); | |
| 501 data[i] = static_cast<LChar>(characters[i]); | |
| 502 } | |
| 503 | |
| 504 return string.release(); | |
| 505 } | |
| 506 | |
| 507 PassRefPtr<StringImpl> StringImpl::create(const LChar* string) { | |
| 508 if (!string) | |
| 509 return empty; | |
| 510 size_t length = strlen(reinterpret_cast<const char*>(string)); | |
| 511 RELEASE_ASSERT(length <= numeric_limits<unsigned>::max()); | |
| 512 return create(string, length); | |
| 513 } | |
| 514 | |
| 515 bool StringImpl::containsOnlyWhitespace() { | |
| 516 // FIXME: The definition of whitespace here includes a number of characters | |
| 517 // that are not whitespace from the point of view of LayoutText; I wonder if | |
| 518 // that's a problem in practice. | |
| 519 if (is8Bit()) { | |
| 520 for (unsigned i = 0; i < m_length; ++i) { | |
| 521 UChar c = characters8()[i]; | |
| 522 if (!isASCIISpace(c)) | |
| 523 return false; | |
| 524 } | |
| 525 | |
| 526 return true; | |
| 527 } | |
| 528 | |
| 529 for (unsigned i = 0; i < m_length; ++i) { | |
| 530 UChar c = characters16()[i]; | |
| 531 if (!isASCIISpace(c)) | |
| 532 return false; | |
| 533 } | |
| 534 return true; | |
| 535 } | |
| 536 | |
| 537 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, | |
| 538 unsigned length) const { | |
| 539 if (start >= m_length) | |
| 540 return empty; | |
| 541 unsigned maxLength = m_length - start; | |
| 542 if (length >= maxLength) { | |
| 543 // PassRefPtr has trouble dealing with const arguments. It should be updated | |
| 544 // so this const_cast is not necessary. | |
| 545 if (!start) | |
| 546 return const_cast<StringImpl*>(this); | |
| 547 length = maxLength; | |
| 548 } | |
| 549 if (is8Bit()) | |
| 550 return create(characters8() + start, length); | |
| 551 | |
| 552 return create(characters16() + start, length); | |
| 553 } | |
| 554 | |
| 555 UChar32 StringImpl::characterStartingAt(unsigned i) { | |
| 556 if (is8Bit()) | |
| 557 return characters8()[i]; | |
| 558 if (U16_IS_SINGLE(characters16()[i])) | |
| 559 return characters16()[i]; | |
| 560 if (i + 1 < m_length && U16_IS_LEAD(characters16()[i]) && | |
| 561 U16_IS_TRAIL(characters16()[i + 1])) | |
| 562 return U16_GET_SUPPLEMENTARY(characters16()[i], characters16()[i + 1]); | |
| 563 return 0; | |
| 564 } | |
| 565 | |
| 566 unsigned StringImpl::copyTo(UChar* buffer, | |
| 567 unsigned start, | |
| 568 unsigned maxLength) const { | |
| 569 unsigned numberOfCharactersToCopy = std::min(length() - start, maxLength); | |
| 570 if (!numberOfCharactersToCopy) | |
| 571 return 0; | |
| 572 if (is8Bit()) | |
| 573 copyChars(buffer, characters8() + start, numberOfCharactersToCopy); | |
| 574 else | |
| 575 copyChars(buffer, characters16() + start, numberOfCharactersToCopy); | |
| 576 return numberOfCharactersToCopy; | |
| 577 } | |
| 578 | |
| 579 PassRefPtr<StringImpl> StringImpl::lowerASCII() { | |
| 580 // First scan the string for uppercase and non-ASCII characters: | |
| 581 if (is8Bit()) { | |
| 582 unsigned firstIndexToBeLowered = m_length; | |
| 583 for (unsigned i = 0; i < m_length; ++i) { | |
| 584 LChar ch = characters8()[i]; | |
| 585 if (isASCIIUpper(ch)) { | |
| 586 firstIndexToBeLowered = i; | |
| 587 break; | |
| 588 } | |
| 589 } | |
| 590 | |
| 591 // Nothing to do if the string is all ASCII with no uppercase. | |
| 592 if (firstIndexToBeLowered == m_length) { | |
| 593 return this; | |
| 594 } | |
| 595 | |
| 596 LChar* data8; | |
| 597 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8); | |
| 598 memcpy(data8, characters8(), firstIndexToBeLowered); | |
| 599 | |
| 600 for (unsigned i = firstIndexToBeLowered; i < m_length; ++i) { | |
| 601 LChar ch = characters8()[i]; | |
| 602 data8[i] = isASCIIUpper(ch) ? toASCIILower(ch) : ch; | |
| 603 } | |
| 604 return newImpl.release(); | |
| 605 } | |
| 606 bool noUpper = true; | |
| 607 UChar ored = 0; | |
| 608 | |
| 609 const UChar* end = characters16() + m_length; | |
| 610 for (const UChar* chp = characters16(); chp != end; ++chp) { | |
| 611 if (isASCIIUpper(*chp)) | |
| 612 noUpper = false; | |
| 613 ored |= *chp; | |
| 614 } | |
| 615 // Nothing to do if the string is all ASCII with no uppercase. | |
| 616 if (noUpper && !(ored & ~0x7F)) | |
| 617 return this; | |
| 618 | |
| 619 RELEASE_ASSERT(m_length <= | |
| 620 static_cast<unsigned>(numeric_limits<unsigned>::max())); | |
| 621 unsigned length = m_length; | |
| 622 | |
| 623 UChar* data16; | |
| 624 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); | |
| 625 | |
| 626 for (unsigned i = 0; i < length; ++i) { | |
| 627 UChar c = characters16()[i]; | |
| 628 data16[i] = isASCIIUpper(c) ? toASCIILower(c) : c; | |
| 629 } | |
| 630 return newImpl.release(); | |
| 631 } | |
| 632 | |
| 633 PassRefPtr<StringImpl> StringImpl::lower() { | |
| 634 // Note: This is a hot function in the Dromaeo benchmark, specifically the | |
| 635 // no-op code path up through the first 'return' statement. | |
| 636 | |
| 637 // First scan the string for uppercase and non-ASCII characters: | |
| 638 if (is8Bit()) { | |
| 639 unsigned firstIndexToBeLowered = m_length; | |
| 640 for (unsigned i = 0; i < m_length; ++i) { | |
| 641 LChar ch = characters8()[i]; | |
| 642 if (UNLIKELY(isASCIIUpper(ch) || ch & ~0x7F)) { | |
| 643 firstIndexToBeLowered = i; | |
| 644 break; | |
| 645 } | |
| 646 } | |
| 647 | |
| 648 // Nothing to do if the string is all ASCII with no uppercase. | |
| 649 if (firstIndexToBeLowered == m_length) | |
| 650 return this; | |
| 651 | |
| 652 LChar* data8; | |
| 653 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8); | |
| 654 memcpy(data8, characters8(), firstIndexToBeLowered); | |
| 655 | |
| 656 for (unsigned i = firstIndexToBeLowered; i < m_length; ++i) { | |
| 657 LChar ch = characters8()[i]; | |
| 658 data8[i] = UNLIKELY(ch & ~0x7F) ? static_cast<LChar>(Unicode::toLower(ch)) | |
| 659 : toASCIILower(ch); | |
| 660 } | |
| 661 | |
| 662 return newImpl.release(); | |
| 663 } | |
| 664 | |
| 665 bool noUpper = true; | |
| 666 UChar ored = 0; | |
| 667 | |
| 668 const UChar* end = characters16() + m_length; | |
| 669 for (const UChar* chp = characters16(); chp != end; ++chp) { | |
| 670 if (UNLIKELY(isASCIIUpper(*chp))) | |
| 671 noUpper = false; | |
| 672 ored |= *chp; | |
| 673 } | |
| 674 // Nothing to do if the string is all ASCII with no uppercase. | |
| 675 if (noUpper && !(ored & ~0x7F)) | |
| 676 return this; | |
| 677 | |
| 678 RELEASE_ASSERT(m_length <= | |
| 679 static_cast<unsigned>(numeric_limits<int32_t>::max())); | |
| 680 int32_t length = m_length; | |
| 681 | |
| 682 if (!(ored & ~0x7F)) { | |
| 683 UChar* data16; | |
| 684 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); | |
| 685 | |
| 686 for (int32_t i = 0; i < length; ++i) { | |
| 687 UChar c = characters16()[i]; | |
| 688 data16[i] = toASCIILower(c); | |
| 689 } | |
| 690 return newImpl.release(); | |
| 691 } | |
| 692 | |
| 693 // Do a slower implementation for cases that include non-ASCII characters. | |
| 694 UChar* data16; | |
| 695 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); | |
| 696 | |
| 697 bool error; | |
| 698 int32_t realLength = | |
| 699 Unicode::toLower(data16, length, characters16(), m_length, &error); | |
| 700 if (!error && realLength == length) | |
| 701 return newImpl.release(); | |
| 702 | |
| 703 newImpl = createUninitialized(realLength, data16); | |
| 704 Unicode::toLower(data16, realLength, characters16(), m_length, &error); | |
| 705 if (error) | |
| 706 return this; | |
| 707 return newImpl.release(); | |
| 708 } | |
| 709 | |
| 710 PassRefPtr<StringImpl> StringImpl::upper() { | |
| 711 // This function could be optimized for no-op cases the way lower() is, | |
| 712 // but in empirical testing, few actual calls to upper() are no-ops, so | |
| 713 // it wouldn't be worth the extra time for pre-scanning. | |
| 714 | |
| 715 RELEASE_ASSERT(m_length <= | |
| 716 static_cast<unsigned>(numeric_limits<int32_t>::max())); | |
| 717 int32_t length = m_length; | |
| 718 | |
| 719 if (is8Bit()) { | |
| 720 LChar* data8; | |
| 721 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8); | |
| 722 | |
| 723 // Do a faster loop for the case where all the characters are ASCII. | |
| 724 LChar ored = 0; | |
| 725 for (int i = 0; i < length; ++i) { | |
| 726 LChar c = characters8()[i]; | |
| 727 ored |= c; | |
| 728 data8[i] = toASCIIUpper(c); | |
| 729 } | |
| 730 if (!(ored & ~0x7F)) | |
| 731 return newImpl.release(); | |
| 732 | |
| 733 // Do a slower implementation for cases that include non-ASCII Latin-1 | |
| 734 // characters. | |
| 735 int numberSharpSCharacters = 0; | |
| 736 | |
| 737 // There are two special cases. | |
| 738 // 1. latin-1 characters when converted to upper case are 16 bit | |
| 739 // characters. | |
| 740 // 2. Lower case sharp-S converts to "SS" (two characters) | |
| 741 for (int32_t i = 0; i < length; ++i) { | |
| 742 LChar c = characters8()[i]; | |
| 743 if (UNLIKELY(c == smallLetterSharpSCharacter)) | |
| 744 ++numberSharpSCharacters; | |
| 745 UChar upper = static_cast<UChar>(Unicode::toUpper(c)); | |
| 746 if (UNLIKELY(upper > 0xff)) { | |
| 747 // Since this upper-cased character does not fit in an 8-bit string, we | |
| 748 // need to take the 16-bit path. | |
| 749 goto upconvert; | |
| 750 } | |
| 751 data8[i] = static_cast<LChar>(upper); | |
| 752 } | |
| 753 | |
| 754 if (!numberSharpSCharacters) | |
| 755 return newImpl.release(); | |
| 756 | |
| 757 // We have numberSSCharacters sharp-s characters, but none of the other | |
| 758 // special characters. | |
| 759 newImpl = createUninitialized(m_length + numberSharpSCharacters, data8); | |
| 760 | |
| 761 LChar* dest = data8; | |
| 762 | |
| 763 for (int32_t i = 0; i < length; ++i) { | |
| 764 LChar c = characters8()[i]; | |
| 765 if (c == smallLetterSharpSCharacter) { | |
| 766 *dest++ = 'S'; | |
| 767 *dest++ = 'S'; | |
| 768 } else { | |
| 769 *dest++ = static_cast<LChar>(Unicode::toUpper(c)); | |
| 770 } | |
| 771 } | |
| 772 | |
| 773 return newImpl.release(); | |
| 774 } | |
| 775 | |
| 776 upconvert: | |
| 777 RefPtr<StringImpl> upconverted = upconvertedString(); | |
| 778 const UChar* source16 = upconverted->characters16(); | |
| 779 | |
| 780 UChar* data16; | |
| 781 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); | |
| 782 | |
| 783 // Do a faster loop for the case where all the characters are ASCII. | |
| 784 UChar ored = 0; | |
| 785 for (int i = 0; i < length; ++i) { | |
| 786 UChar c = source16[i]; | |
| 787 ored |= c; | |
| 788 data16[i] = toASCIIUpper(c); | |
| 789 } | |
| 790 if (!(ored & ~0x7F)) | |
| 791 return newImpl.release(); | |
| 792 | |
| 793 // Do a slower implementation for cases that include non-ASCII characters. | |
| 794 bool error; | |
| 795 int32_t realLength = | |
| 796 Unicode::toUpper(data16, length, source16, m_length, &error); | |
| 797 if (!error && realLength == length) | |
| 798 return newImpl; | |
| 799 newImpl = createUninitialized(realLength, data16); | |
| 800 Unicode::toUpper(data16, realLength, source16, m_length, &error); | |
| 801 if (error) | |
| 802 return this; | |
| 803 return newImpl.release(); | |
| 804 } | |
| 805 | |
| 806 PassRefPtr<StringImpl> StringImpl::upperASCII() { | |
| 807 if (is8Bit()) { | |
| 808 LChar* data8; | |
| 809 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8); | |
| 810 | |
| 811 for (unsigned i = 0; i < m_length; ++i) { | |
| 812 LChar c = characters8()[i]; | |
| 813 data8[i] = isASCIILower(c) ? toASCIIUpper(c) : c; | |
| 814 } | |
| 815 return newImpl.release(); | |
| 816 } | |
| 817 | |
| 818 UChar* data16; | |
| 819 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); | |
| 820 | |
| 821 for (unsigned i = 0; i < m_length; ++i) { | |
| 822 UChar c = characters16()[i]; | |
| 823 data16[i] = isASCIILower(c) ? toASCIIUpper(c) : c; | |
| 824 } | |
| 825 return newImpl.release(); | |
| 826 } | |
| 827 | |
| 828 static inline bool localeIdMatchesLang(const AtomicString& localeId, | |
| 829 const StringView& lang) { | |
| 830 RELEASE_ASSERT(lang.length() >= 2 && lang.length() <= 3); | |
| 831 if (!localeId.impl() || !localeId.impl()->startsWithIgnoringCase(lang)) | |
| 832 return false; | |
| 833 if (localeId.impl()->length() == lang.length()) | |
| 834 return true; | |
| 835 const UChar maybeDelimiter = (*localeId.impl())[lang.length()]; | |
| 836 return maybeDelimiter == '-' || maybeDelimiter == '_' || | |
| 837 maybeDelimiter == '@'; | |
| 838 } | |
| 839 | |
| 840 typedef int32_t (*icuCaseConverter)(UChar*, | |
| 841 int32_t, | |
| 842 const UChar*, | |
| 843 int32_t, | |
| 844 const char*, | |
| 845 UErrorCode*); | |
| 846 | |
| 847 static PassRefPtr<StringImpl> caseConvert(const UChar* source16, | |
| 848 size_t length, | |
| 849 icuCaseConverter converter, | |
| 850 const char* locale, | |
| 851 StringImpl* originalString) { | |
| 852 UChar* data16; | |
| 853 size_t targetLength = length; | |
| 854 RefPtr<StringImpl> output = StringImpl::createUninitialized(length, data16); | |
| 855 do { | |
| 856 UErrorCode status = U_ZERO_ERROR; | |
| 857 targetLength = | |
| 858 converter(data16, targetLength, source16, length, locale, &status); | |
| 859 if (U_SUCCESS(status)) { | |
| 860 if (length > 0) | |
| 861 return output->substring(0, targetLength); | |
| 862 return output.release(); | |
| 863 } | |
| 864 if (status != U_BUFFER_OVERFLOW_ERROR) | |
| 865 return originalString; | |
| 866 // Expand the buffer. | |
| 867 output = StringImpl::createUninitialized(targetLength, data16); | |
| 868 } while (true); | |
| 869 } | |
| 870 | |
| 871 PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier) { | |
| 872 // Use the more optimized code path most of the time. | |
| 873 // Only Turkic (tr and az) languages and Lithuanian requires | |
| 874 // locale-specific lowercasing rules. Even though CLDR has el-Lower, | |
| 875 // it's identical to the locale-agnostic lowercasing. Context-dependent | |
| 876 // handling of Greek capital sigma is built into the common lowercasing | |
| 877 // function in ICU. | |
| 878 const char* localeForConversion = 0; | |
| 879 if (localeIdMatchesLang(localeIdentifier, "tr") || | |
| 880 localeIdMatchesLang(localeIdentifier, "az")) | |
| 881 localeForConversion = "tr"; | |
| 882 else if (localeIdMatchesLang(localeIdentifier, "lt")) | |
| 883 localeForConversion = "lt"; | |
| 884 else | |
| 885 return lower(); | |
| 886 | |
| 887 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max())) | |
| 888 CRASH(); | |
| 889 int length = m_length; | |
| 890 | |
| 891 RefPtr<StringImpl> upconverted = upconvertedString(); | |
| 892 const UChar* source16 = upconverted->characters16(); | |
| 893 return caseConvert(source16, length, u_strToLower, localeForConversion, this); | |
| 894 } | |
| 895 | |
| 896 PassRefPtr<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier) { | |
| 897 // Use the more-optimized code path most of the time. | |
| 898 // Only Turkic (tr and az) languages, Greek and Lithuanian require | |
| 899 // locale-specific uppercasing rules. | |
| 900 const char* localeForConversion = 0; | |
| 901 if (localeIdMatchesLang(localeIdentifier, "tr") || | |
| 902 localeIdMatchesLang(localeIdentifier, "az")) | |
| 903 localeForConversion = "tr"; | |
| 904 else if (localeIdMatchesLang(localeIdentifier, "el")) | |
| 905 localeForConversion = "el"; | |
| 906 else if (localeIdMatchesLang(localeIdentifier, "lt")) | |
| 907 localeForConversion = "lt"; | |
| 908 else | |
| 909 return upper(); | |
| 910 | |
| 911 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max())) | |
| 912 CRASH(); | |
| 913 int length = m_length; | |
| 914 | |
| 915 RefPtr<StringImpl> upconverted = upconvertedString(); | |
| 916 const UChar* source16 = upconverted->characters16(); | |
| 917 | |
| 918 return caseConvert(source16, length, u_strToUpper, localeForConversion, this); | |
| 919 } | |
| 920 | |
| 921 PassRefPtr<StringImpl> StringImpl::fill(UChar character) { | |
| 922 if (!(character & ~0x7F)) { | |
| 923 LChar* data; | |
| 924 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
| 925 for (unsigned i = 0; i < m_length; ++i) | |
| 926 data[i] = static_cast<LChar>(character); | |
| 927 return newImpl.release(); | |
| 928 } | |
| 929 UChar* data; | |
| 930 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
| 931 for (unsigned i = 0; i < m_length; ++i) | |
| 932 data[i] = character; | |
| 933 return newImpl.release(); | |
| 934 } | |
| 935 | |
| 936 PassRefPtr<StringImpl> StringImpl::foldCase() { | |
| 937 RELEASE_ASSERT(m_length <= | |
| 938 static_cast<unsigned>(numeric_limits<int32_t>::max())); | |
| 939 int32_t length = m_length; | |
| 940 | |
| 941 if (is8Bit()) { | |
| 942 // Do a faster loop for the case where all the characters are ASCII. | |
| 943 LChar* data; | |
| 944 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
| 945 LChar ored = 0; | |
| 946 | |
| 947 for (int32_t i = 0; i < length; ++i) { | |
| 948 LChar c = characters8()[i]; | |
| 949 data[i] = toASCIILower(c); | |
| 950 ored |= c; | |
| 951 } | |
| 952 | |
| 953 if (!(ored & ~0x7F)) | |
| 954 return newImpl.release(); | |
| 955 | |
| 956 // Do a slower implementation for cases that include non-ASCII Latin-1 | |
| 957 // characters. | |
| 958 for (int32_t i = 0; i < length; ++i) | |
| 959 data[i] = static_cast<LChar>(Unicode::toLower(characters8()[i])); | |
| 960 | |
| 961 return newImpl.release(); | |
| 962 } | |
| 963 | |
| 964 // Do a faster loop for the case where all the characters are ASCII. | |
| 965 UChar* data; | |
| 966 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
| 967 UChar ored = 0; | |
| 968 for (int32_t i = 0; i < length; ++i) { | |
| 969 UChar c = characters16()[i]; | |
| 970 ored |= c; | |
| 971 data[i] = toASCIILower(c); | |
| 972 } | |
| 973 if (!(ored & ~0x7F)) | |
| 974 return newImpl.release(); | |
| 975 | |
| 976 // Do a slower implementation for cases that include non-ASCII characters. | |
| 977 bool error; | |
| 978 int32_t realLength = | |
| 979 Unicode::foldCase(data, length, characters16(), m_length, &error); | |
| 980 if (!error && realLength == length) | |
| 981 return newImpl.release(); | |
| 982 newImpl = createUninitialized(realLength, data); | |
| 983 Unicode::foldCase(data, realLength, characters16(), m_length, &error); | |
| 984 if (error) | |
| 985 return this; | |
| 986 return newImpl.release(); | |
| 987 } | |
| 988 | |
| 989 PassRefPtr<StringImpl> StringImpl::truncate(unsigned length) { | |
| 990 if (length >= m_length) | |
| 991 return this; | |
| 992 if (is8Bit()) | |
| 993 return create(characters8(), length); | |
| 994 return create(characters16(), length); | |
| 995 } | |
| 996 | |
| 997 template <class UCharPredicate> | |
| 998 inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters( | |
| 999 UCharPredicate predicate) { | |
| 1000 if (!m_length) | |
| 1001 return empty; | |
| 1002 | |
| 1003 unsigned start = 0; | |
| 1004 unsigned end = m_length - 1; | |
| 1005 | |
| 1006 // skip white space from start | |
| 1007 while (start <= end && | |
| 1008 predicate(is8Bit() ? characters8()[start] : characters16()[start])) | |
| 1009 ++start; | |
| 1010 | |
| 1011 // only white space | |
| 1012 if (start > end) | |
| 1013 return empty; | |
| 1014 | |
| 1015 // skip white space from end | |
| 1016 while (end && predicate(is8Bit() ? characters8()[end] : characters16()[end])) | |
| 1017 --end; | |
| 1018 | |
| 1019 if (!start && end == m_length - 1) | |
| 1020 return this; | |
| 1021 if (is8Bit()) | |
| 1022 return create(characters8() + start, end + 1 - start); | |
| 1023 return create(characters16() + start, end + 1 - start); | |
| 1024 } | |
| 1025 | |
| 1026 class UCharPredicate final { | |
| 1027 STACK_ALLOCATED(); | |
| 1028 | |
| 1029 public: | |
| 1030 inline UCharPredicate(CharacterMatchFunctionPtr function) | |
| 1031 : m_function(function) {} | |
| 1032 | |
| 1033 inline bool operator()(UChar ch) const { return m_function(ch); } | |
| 1034 | |
| 1035 private: | |
| 1036 const CharacterMatchFunctionPtr m_function; | |
| 1037 }; | |
| 1038 | |
| 1039 class SpaceOrNewlinePredicate final { | |
| 1040 STACK_ALLOCATED(); | |
| 1041 | |
| 1042 public: | |
| 1043 inline bool operator()(UChar ch) const { return isSpaceOrNewline(ch); } | |
| 1044 }; | |
| 1045 | |
| 1046 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace() { | |
| 1047 return stripMatchedCharacters(SpaceOrNewlinePredicate()); | |
| 1048 } | |
| 1049 | |
| 1050 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace( | |
| 1051 IsWhiteSpaceFunctionPtr isWhiteSpace) { | |
| 1052 return stripMatchedCharacters(UCharPredicate(isWhiteSpace)); | |
| 1053 } | |
| 1054 | |
| 1055 template <typename CharType> | |
| 1056 ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters( | |
| 1057 const CharType* characters, | |
| 1058 CharacterMatchFunctionPtr findMatch) { | |
| 1059 const CharType* from = characters; | |
| 1060 const CharType* fromend = from + m_length; | |
| 1061 | |
| 1062 // Assume the common case will not remove any characters | |
| 1063 while (from != fromend && !findMatch(*from)) | |
| 1064 ++from; | |
| 1065 if (from == fromend) | |
| 1066 return this; | |
| 1067 | |
| 1068 StringBuffer<CharType> data(m_length); | |
| 1069 CharType* to = data.characters(); | |
| 1070 unsigned outc = from - characters; | |
| 1071 | |
| 1072 if (outc) | |
| 1073 memcpy(to, characters, outc * sizeof(CharType)); | |
| 1074 | |
| 1075 while (true) { | |
| 1076 while (from != fromend && findMatch(*from)) | |
| 1077 ++from; | |
| 1078 while (from != fromend && !findMatch(*from)) | |
| 1079 to[outc++] = *from++; | |
| 1080 if (from == fromend) | |
| 1081 break; | |
| 1082 } | |
| 1083 | |
| 1084 data.shrink(outc); | |
| 1085 | |
| 1086 return data.release(); | |
| 1087 } | |
| 1088 | |
| 1089 PassRefPtr<StringImpl> StringImpl::removeCharacters( | |
| 1090 CharacterMatchFunctionPtr findMatch) { | |
| 1091 if (is8Bit()) | |
| 1092 return removeCharacters(characters8(), findMatch); | |
| 1093 return removeCharacters(characters16(), findMatch); | |
| 1094 } | |
| 1095 | |
| 1096 PassRefPtr<StringImpl> StringImpl::remove(unsigned start, | |
| 1097 unsigned lengthToRemove) { | |
| 1098 if (lengthToRemove <= 0) | |
| 1099 return this; | |
| 1100 if (start >= m_length) | |
| 1101 return this; | |
| 1102 | |
| 1103 lengthToRemove = std::min(m_length - start, lengthToRemove); | |
| 1104 unsigned removedEnd = start + lengthToRemove; | |
| 1105 | |
| 1106 if (is8Bit()) { | |
| 1107 StringBuffer<LChar> buffer(m_length - lengthToRemove); | |
| 1108 copyChars(buffer.characters(), characters8(), start); | |
| 1109 copyChars(buffer.characters() + start, characters8() + removedEnd, | |
| 1110 m_length - removedEnd); | |
| 1111 return buffer.release(); | |
| 1112 } | |
| 1113 StringBuffer<UChar> buffer(m_length - lengthToRemove); | |
| 1114 copyChars(buffer.characters(), characters16(), start); | |
| 1115 copyChars(buffer.characters() + start, characters16() + removedEnd, | |
| 1116 m_length - removedEnd); | |
| 1117 return buffer.release(); | |
| 1118 } | |
| 1119 | |
| 1120 template <typename CharType, class UCharPredicate> | |
| 1121 inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace( | |
| 1122 UCharPredicate predicate, | |
| 1123 StripBehavior stripBehavior) { | |
| 1124 StringBuffer<CharType> data(m_length); | |
| 1125 | |
| 1126 const CharType* from = getCharacters<CharType>(); | |
| 1127 const CharType* fromend = from + m_length; | |
| 1128 int outc = 0; | |
| 1129 bool changedToSpace = false; | |
| 1130 | |
| 1131 CharType* to = data.characters(); | |
| 1132 | |
| 1133 if (stripBehavior == StripExtraWhiteSpace) { | |
| 1134 while (true) { | |
| 1135 while (from != fromend && predicate(*from)) { | |
| 1136 if (*from != ' ') | |
| 1137 changedToSpace = true; | |
| 1138 ++from; | |
| 1139 } | |
| 1140 while (from != fromend && !predicate(*from)) | |
| 1141 to[outc++] = *from++; | |
| 1142 if (from != fromend) | |
| 1143 to[outc++] = ' '; | |
| 1144 else | |
| 1145 break; | |
| 1146 } | |
| 1147 | |
| 1148 if (outc > 0 && to[outc - 1] == ' ') | |
| 1149 --outc; | |
| 1150 } else { | |
| 1151 for (; from != fromend; ++from) { | |
| 1152 if (predicate(*from)) { | |
| 1153 if (*from != ' ') | |
| 1154 changedToSpace = true; | |
| 1155 to[outc++] = ' '; | |
| 1156 } else { | |
| 1157 to[outc++] = *from; | |
| 1158 } | |
| 1159 } | |
| 1160 } | |
| 1161 | |
| 1162 if (static_cast<unsigned>(outc) == m_length && !changedToSpace) | |
| 1163 return this; | |
| 1164 | |
| 1165 data.shrink(outc); | |
| 1166 | |
| 1167 return data.release(); | |
| 1168 } | |
| 1169 | |
| 1170 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace( | |
| 1171 StripBehavior stripBehavior) { | |
| 1172 if (is8Bit()) | |
| 1173 return StringImpl::simplifyMatchedCharactersToSpace<LChar>( | |
| 1174 SpaceOrNewlinePredicate(), stripBehavior); | |
| 1175 return StringImpl::simplifyMatchedCharactersToSpace<UChar>( | |
| 1176 SpaceOrNewlinePredicate(), stripBehavior); | |
| 1177 } | |
| 1178 | |
| 1179 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace( | |
| 1180 IsWhiteSpaceFunctionPtr isWhiteSpace, | |
| 1181 StripBehavior stripBehavior) { | |
| 1182 if (is8Bit()) | |
| 1183 return StringImpl::simplifyMatchedCharactersToSpace<LChar>( | |
| 1184 UCharPredicate(isWhiteSpace), stripBehavior); | |
| 1185 return StringImpl::simplifyMatchedCharactersToSpace<UChar>( | |
| 1186 UCharPredicate(isWhiteSpace), stripBehavior); | |
| 1187 } | |
| 1188 | |
| 1189 int StringImpl::toIntStrict(bool* ok, int base) { | |
| 1190 if (is8Bit()) | |
| 1191 return charactersToIntStrict(characters8(), m_length, ok, base); | |
| 1192 return charactersToIntStrict(characters16(), m_length, ok, base); | |
| 1193 } | |
| 1194 | |
| 1195 unsigned StringImpl::toUIntStrict(bool* ok, int base) { | |
| 1196 if (is8Bit()) | |
| 1197 return charactersToUIntStrict(characters8(), m_length, ok, base); | |
| 1198 return charactersToUIntStrict(characters16(), m_length, ok, base); | |
| 1199 } | |
| 1200 | |
| 1201 int64_t StringImpl::toInt64Strict(bool* ok, int base) { | |
| 1202 if (is8Bit()) | |
| 1203 return charactersToInt64Strict(characters8(), m_length, ok, base); | |
| 1204 return charactersToInt64Strict(characters16(), m_length, ok, base); | |
| 1205 } | |
| 1206 | |
| 1207 uint64_t StringImpl::toUInt64Strict(bool* ok, int base) { | |
| 1208 if (is8Bit()) | |
| 1209 return charactersToUInt64Strict(characters8(), m_length, ok, base); | |
| 1210 return charactersToUInt64Strict(characters16(), m_length, ok, base); | |
| 1211 } | |
| 1212 | |
| 1213 int StringImpl::toInt(bool* ok) { | |
| 1214 if (is8Bit()) | |
| 1215 return charactersToInt(characters8(), m_length, ok); | |
| 1216 return charactersToInt(characters16(), m_length, ok); | |
| 1217 } | |
| 1218 | |
| 1219 unsigned StringImpl::toUInt(bool* ok) { | |
| 1220 if (is8Bit()) | |
| 1221 return charactersToUInt(characters8(), m_length, ok); | |
| 1222 return charactersToUInt(characters16(), m_length, ok); | |
| 1223 } | |
| 1224 | |
| 1225 int64_t StringImpl::toInt64(bool* ok) { | |
| 1226 if (is8Bit()) | |
| 1227 return charactersToInt64(characters8(), m_length, ok); | |
| 1228 return charactersToInt64(characters16(), m_length, ok); | |
| 1229 } | |
| 1230 | |
| 1231 uint64_t StringImpl::toUInt64(bool* ok) { | |
| 1232 if (is8Bit()) | |
| 1233 return charactersToUInt64(characters8(), m_length, ok); | |
| 1234 return charactersToUInt64(characters16(), m_length, ok); | |
| 1235 } | |
| 1236 | |
| 1237 double StringImpl::toDouble(bool* ok) { | |
| 1238 if (is8Bit()) | |
| 1239 return charactersToDouble(characters8(), m_length, ok); | |
| 1240 return charactersToDouble(characters16(), m_length, ok); | |
| 1241 } | |
| 1242 | |
| 1243 float StringImpl::toFloat(bool* ok) { | |
| 1244 if (is8Bit()) | |
| 1245 return charactersToFloat(characters8(), m_length, ok); | |
| 1246 return charactersToFloat(characters16(), m_length, ok); | |
| 1247 } | |
| 1248 | |
| 1249 // Table is based on ftp://ftp.unicode.org/Public/UNIDATA/CaseFolding.txt | |
| 1250 const UChar StringImpl::latin1CaseFoldTable[256] = { | |
| 1251 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, | |
| 1252 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011, | |
| 1253 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, | |
| 1254 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023, | |
| 1255 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, | |
| 1256 0x002d, 0x002e, 0x002f, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, | |
| 1257 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, | |
| 1258 0x003f, 0x0040, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, | |
| 1259 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 0x0070, | |
| 1260 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, | |
| 1261 0x007a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 0x0060, 0x0061, 0x0062, | |
| 1262 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, | |
| 1263 0x006c, 0x006d, 0x006e, 0x006f, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, | |
| 1264 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, | |
| 1265 0x007e, 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, | |
| 1266 0x0087, 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, | |
| 1267 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, | |
| 1268 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, | |
| 1269 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00aa, | |
| 1270 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0, 0x00b1, 0x00b2, 0x00b3, | |
| 1271 0x00b4, 0x03bc, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, | |
| 1272 0x00bd, 0x00be, 0x00bf, 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, | |
| 1273 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, | |
| 1274 0x00ef, 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00d7, | |
| 1275 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00df, 0x00e0, | |
| 1276 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, | |
| 1277 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 0x00f0, 0x00f1, 0x00f2, | |
| 1278 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00f9, 0x00fa, 0x00fb, | |
| 1279 0x00fc, 0x00fd, 0x00fe, 0x00ff, | |
| 1280 }; | |
| 1281 | |
| 1282 bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length) { | |
| 1283 DCHECK_GE(length, 0u); | |
| 1284 if (a == b) | |
| 1285 return true; | |
| 1286 while (length--) { | |
| 1287 if (StringImpl::latin1CaseFoldTable[*a++] != | |
| 1288 StringImpl::latin1CaseFoldTable[*b++]) | |
| 1289 return false; | |
| 1290 } | |
| 1291 return true; | |
| 1292 } | |
| 1293 | |
| 1294 bool equalIgnoringCase(const UChar* a, const UChar* b, unsigned length) { | |
| 1295 DCHECK_GE(length, 0u); | |
| 1296 if (a == b) | |
| 1297 return true; | |
| 1298 return !Unicode::umemcasecmp(a, b, length); | |
| 1299 } | |
| 1300 | |
| 1301 bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length) { | |
| 1302 while (length--) { | |
| 1303 if (foldCase(*a++) != StringImpl::latin1CaseFoldTable[*b++]) | |
| 1304 return false; | |
| 1305 } | |
| 1306 return true; | |
| 1307 } | |
| 1308 | |
| 1309 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, | |
| 1310 unsigned start) { | |
| 1311 if (is8Bit()) | |
| 1312 return WTF::find(characters8(), m_length, matchFunction, start); | |
| 1313 return WTF::find(characters16(), m_length, matchFunction, start); | |
| 1314 } | |
| 1315 | |
| 1316 template <typename SearchCharacterType, typename MatchCharacterType> | |
| 1317 ALWAYS_INLINE static size_t findInternal( | |
| 1318 const SearchCharacterType* searchCharacters, | |
| 1319 const MatchCharacterType* matchCharacters, | |
| 1320 unsigned index, | |
| 1321 unsigned searchLength, | |
| 1322 unsigned matchLength) { | |
| 1323 // Optimization: keep a running hash of the strings, | |
| 1324 // only call equal() if the hashes match. | |
| 1325 | |
| 1326 // delta is the number of additional times to test; delta == 0 means test only | |
| 1327 // once. | |
| 1328 unsigned delta = searchLength - matchLength; | |
| 1329 | |
| 1330 unsigned searchHash = 0; | |
| 1331 unsigned matchHash = 0; | |
| 1332 | |
| 1333 for (unsigned i = 0; i < matchLength; ++i) { | |
| 1334 searchHash += searchCharacters[i]; | |
| 1335 matchHash += matchCharacters[i]; | |
| 1336 } | |
| 1337 | |
| 1338 unsigned i = 0; | |
| 1339 // keep looping until we match | |
| 1340 while (searchHash != matchHash || | |
| 1341 !equal(searchCharacters + i, matchCharacters, matchLength)) { | |
| 1342 if (i == delta) | |
| 1343 return kNotFound; | |
| 1344 searchHash += searchCharacters[i + matchLength]; | |
| 1345 searchHash -= searchCharacters[i]; | |
| 1346 ++i; | |
| 1347 } | |
| 1348 return index + i; | |
| 1349 } | |
| 1350 | |
| 1351 size_t StringImpl::find(const StringView& matchString, unsigned index) { | |
| 1352 if (UNLIKELY(matchString.isNull())) | |
| 1353 return kNotFound; | |
| 1354 | |
| 1355 unsigned matchLength = matchString.length(); | |
| 1356 | |
| 1357 // Optimization 1: fast case for strings of length 1. | |
| 1358 if (matchLength == 1) { | |
| 1359 if (is8Bit()) | |
| 1360 return WTF::find(characters8(), length(), matchString[0], index); | |
| 1361 return WTF::find(characters16(), length(), matchString[0], index); | |
| 1362 } | |
| 1363 | |
| 1364 if (UNLIKELY(!matchLength)) | |
| 1365 return min(index, length()); | |
| 1366 | |
| 1367 // Check index & matchLength are in range. | |
| 1368 if (index > length()) | |
| 1369 return kNotFound; | |
| 1370 unsigned searchLength = length() - index; | |
| 1371 if (matchLength > searchLength) | |
| 1372 return kNotFound; | |
| 1373 | |
| 1374 if (is8Bit()) { | |
| 1375 if (matchString.is8Bit()) | |
| 1376 return findInternal(characters8() + index, matchString.characters8(), | |
| 1377 index, searchLength, matchLength); | |
| 1378 return findInternal(characters8() + index, matchString.characters16(), | |
| 1379 index, searchLength, matchLength); | |
| 1380 } | |
| 1381 if (matchString.is8Bit()) | |
| 1382 return findInternal(characters16() + index, matchString.characters8(), | |
| 1383 index, searchLength, matchLength); | |
| 1384 return findInternal(characters16() + index, matchString.characters16(), index, | |
| 1385 searchLength, matchLength); | |
| 1386 } | |
| 1387 | |
| 1388 template <typename SearchCharacterType, typename MatchCharacterType> | |
| 1389 ALWAYS_INLINE static size_t findIgnoringCaseInternal( | |
| 1390 const SearchCharacterType* searchCharacters, | |
| 1391 const MatchCharacterType* matchCharacters, | |
| 1392 unsigned index, | |
| 1393 unsigned searchLength, | |
| 1394 unsigned matchLength) { | |
| 1395 // delta is the number of additional times to test; delta == 0 means test only | |
| 1396 // once. | |
| 1397 unsigned delta = searchLength - matchLength; | |
| 1398 | |
| 1399 unsigned i = 0; | |
| 1400 // keep looping until we match | |
| 1401 while ( | |
| 1402 !equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) { | |
| 1403 if (i == delta) | |
| 1404 return kNotFound; | |
| 1405 ++i; | |
| 1406 } | |
| 1407 return index + i; | |
| 1408 } | |
| 1409 | |
| 1410 size_t StringImpl::findIgnoringCase(const StringView& matchString, | |
| 1411 unsigned index) { | |
| 1412 if (UNLIKELY(matchString.isNull())) | |
| 1413 return kNotFound; | |
| 1414 | |
| 1415 unsigned matchLength = matchString.length(); | |
| 1416 if (!matchLength) | |
| 1417 return min(index, length()); | |
| 1418 | |
| 1419 // Check index & matchLength are in range. | |
| 1420 if (index > length()) | |
| 1421 return kNotFound; | |
| 1422 unsigned searchLength = length() - index; | |
| 1423 if (matchLength > searchLength) | |
| 1424 return kNotFound; | |
| 1425 | |
| 1426 if (is8Bit()) { | |
| 1427 if (matchString.is8Bit()) | |
| 1428 return findIgnoringCaseInternal(characters8() + index, | |
| 1429 matchString.characters8(), index, | |
| 1430 searchLength, matchLength); | |
| 1431 return findIgnoringCaseInternal(characters8() + index, | |
| 1432 matchString.characters16(), index, | |
| 1433 searchLength, matchLength); | |
| 1434 } | |
| 1435 if (matchString.is8Bit()) | |
| 1436 return findIgnoringCaseInternal(characters16() + index, | |
| 1437 matchString.characters8(), index, | |
| 1438 searchLength, matchLength); | |
| 1439 return findIgnoringCaseInternal(characters16() + index, | |
| 1440 matchString.characters16(), index, | |
| 1441 searchLength, matchLength); | |
| 1442 } | |
| 1443 | |
| 1444 template <typename SearchCharacterType, typename MatchCharacterType> | |
| 1445 ALWAYS_INLINE static size_t findIgnoringASCIICaseInternal( | |
| 1446 const SearchCharacterType* searchCharacters, | |
| 1447 const MatchCharacterType* matchCharacters, | |
| 1448 unsigned index, | |
| 1449 unsigned searchLength, | |
| 1450 unsigned matchLength) { | |
| 1451 // delta is the number of additional times to test; delta == 0 means test only | |
| 1452 // once. | |
| 1453 unsigned delta = searchLength - matchLength; | |
| 1454 | |
| 1455 unsigned i = 0; | |
| 1456 // keep looping until we match | |
| 1457 while (!equalIgnoringASCIICase(searchCharacters + i, matchCharacters, | |
| 1458 matchLength)) { | |
| 1459 if (i == delta) | |
| 1460 return kNotFound; | |
| 1461 ++i; | |
| 1462 } | |
| 1463 return index + i; | |
| 1464 } | |
| 1465 | |
| 1466 size_t StringImpl::findIgnoringASCIICase(const StringView& matchString, | |
| 1467 unsigned index) { | |
| 1468 if (UNLIKELY(matchString.isNull())) | |
| 1469 return kNotFound; | |
| 1470 | |
| 1471 unsigned matchLength = matchString.length(); | |
| 1472 if (!matchLength) | |
| 1473 return min(index, length()); | |
| 1474 | |
| 1475 // Check index & matchLength are in range. | |
| 1476 if (index > length()) | |
| 1477 return kNotFound; | |
| 1478 unsigned searchLength = length() - index; | |
| 1479 if (matchLength > searchLength) | |
| 1480 return kNotFound; | |
| 1481 | |
| 1482 if (is8Bit()) { | |
| 1483 if (matchString.is8Bit()) | |
| 1484 return findIgnoringASCIICaseInternal(characters8() + index, | |
| 1485 matchString.characters8(), index, | |
| 1486 searchLength, matchLength); | |
| 1487 return findIgnoringASCIICaseInternal(characters8() + index, | |
| 1488 matchString.characters16(), index, | |
| 1489 searchLength, matchLength); | |
| 1490 } | |
| 1491 if (matchString.is8Bit()) | |
| 1492 return findIgnoringASCIICaseInternal(characters16() + index, | |
| 1493 matchString.characters8(), index, | |
| 1494 searchLength, matchLength); | |
| 1495 return findIgnoringASCIICaseInternal(characters16() + index, | |
| 1496 matchString.characters16(), index, | |
| 1497 searchLength, matchLength); | |
| 1498 } | |
| 1499 | |
| 1500 size_t StringImpl::reverseFind(UChar c, unsigned index) { | |
| 1501 if (is8Bit()) | |
| 1502 return WTF::reverseFind(characters8(), m_length, c, index); | |
| 1503 return WTF::reverseFind(characters16(), m_length, c, index); | |
| 1504 } | |
| 1505 | |
| 1506 template <typename SearchCharacterType, typename MatchCharacterType> | |
| 1507 ALWAYS_INLINE static size_t reverseFindInternal( | |
| 1508 const SearchCharacterType* searchCharacters, | |
| 1509 const MatchCharacterType* matchCharacters, | |
| 1510 unsigned index, | |
| 1511 unsigned length, | |
| 1512 unsigned matchLength) { | |
| 1513 // Optimization: keep a running hash of the strings, | |
| 1514 // only call equal if the hashes match. | |
| 1515 | |
| 1516 // delta is the number of additional times to test; delta == 0 means test only | |
| 1517 // once. | |
| 1518 unsigned delta = min(index, length - matchLength); | |
| 1519 | |
| 1520 unsigned searchHash = 0; | |
| 1521 unsigned matchHash = 0; | |
| 1522 for (unsigned i = 0; i < matchLength; ++i) { | |
| 1523 searchHash += searchCharacters[delta + i]; | |
| 1524 matchHash += matchCharacters[i]; | |
| 1525 } | |
| 1526 | |
| 1527 // keep looping until we match | |
| 1528 while (searchHash != matchHash || | |
| 1529 !equal(searchCharacters + delta, matchCharacters, matchLength)) { | |
| 1530 if (!delta) | |
| 1531 return kNotFound; | |
| 1532 --delta; | |
| 1533 searchHash -= searchCharacters[delta + matchLength]; | |
| 1534 searchHash += searchCharacters[delta]; | |
| 1535 } | |
| 1536 return delta; | |
| 1537 } | |
| 1538 | |
| 1539 size_t StringImpl::reverseFind(const StringView& matchString, unsigned index) { | |
| 1540 if (UNLIKELY(matchString.isNull())) | |
| 1541 return kNotFound; | |
| 1542 | |
| 1543 unsigned matchLength = matchString.length(); | |
| 1544 unsigned ourLength = length(); | |
| 1545 if (!matchLength) | |
| 1546 return min(index, ourLength); | |
| 1547 | |
| 1548 // Optimization 1: fast case for strings of length 1. | |
| 1549 if (matchLength == 1) { | |
| 1550 if (is8Bit()) | |
| 1551 return WTF::reverseFind(characters8(), ourLength, matchString[0], index); | |
| 1552 return WTF::reverseFind(characters16(), ourLength, matchString[0], index); | |
| 1553 } | |
| 1554 | |
| 1555 // Check index & matchLength are in range. | |
| 1556 if (matchLength > ourLength) | |
| 1557 return kNotFound; | |
| 1558 | |
| 1559 if (is8Bit()) { | |
| 1560 if (matchString.is8Bit()) | |
| 1561 return reverseFindInternal(characters8(), matchString.characters8(), | |
| 1562 index, ourLength, matchLength); | |
| 1563 return reverseFindInternal(characters8(), matchString.characters16(), index, | |
| 1564 ourLength, matchLength); | |
| 1565 } | |
| 1566 if (matchString.is8Bit()) | |
| 1567 return reverseFindInternal(characters16(), matchString.characters8(), index, | |
| 1568 ourLength, matchLength); | |
| 1569 return reverseFindInternal(characters16(), matchString.characters16(), index, | |
| 1570 ourLength, matchLength); | |
| 1571 } | |
| 1572 | |
| 1573 bool StringImpl::startsWith(UChar character) const { | |
| 1574 return m_length && (*this)[0] == character; | |
| 1575 } | |
| 1576 | |
| 1577 bool StringImpl::startsWith(const StringView& prefix) const { | |
| 1578 if (prefix.length() > length()) | |
| 1579 return false; | |
| 1580 if (is8Bit()) { | |
| 1581 if (prefix.is8Bit()) | |
| 1582 return equal(characters8(), prefix.characters8(), prefix.length()); | |
| 1583 return equal(characters8(), prefix.characters16(), prefix.length()); | |
| 1584 } | |
| 1585 if (prefix.is8Bit()) | |
| 1586 return equal(characters16(), prefix.characters8(), prefix.length()); | |
| 1587 return equal(characters16(), prefix.characters16(), prefix.length()); | |
| 1588 } | |
| 1589 | |
| 1590 bool StringImpl::startsWithIgnoringCase(const StringView& prefix) const { | |
| 1591 if (prefix.length() > length()) | |
| 1592 return false; | |
| 1593 if (is8Bit()) { | |
| 1594 if (prefix.is8Bit()) | |
| 1595 return equalIgnoringCase(characters8(), prefix.characters8(), | |
| 1596 prefix.length()); | |
| 1597 return equalIgnoringCase(characters8(), prefix.characters16(), | |
| 1598 prefix.length()); | |
| 1599 } | |
| 1600 if (prefix.is8Bit()) | |
| 1601 return equalIgnoringCase(characters16(), prefix.characters8(), | |
| 1602 prefix.length()); | |
| 1603 return equalIgnoringCase(characters16(), prefix.characters16(), | |
| 1604 prefix.length()); | |
| 1605 } | |
| 1606 | |
| 1607 bool StringImpl::startsWithIgnoringASCIICase(const StringView& prefix) const { | |
| 1608 if (prefix.length() > length()) | |
| 1609 return false; | |
| 1610 if (is8Bit()) { | |
| 1611 if (prefix.is8Bit()) | |
| 1612 return equalIgnoringASCIICase(characters8(), prefix.characters8(), | |
| 1613 prefix.length()); | |
| 1614 return equalIgnoringASCIICase(characters8(), prefix.characters16(), | |
| 1615 prefix.length()); | |
| 1616 } | |
| 1617 if (prefix.is8Bit()) | |
| 1618 return equalIgnoringASCIICase(characters16(), prefix.characters8(), | |
| 1619 prefix.length()); | |
| 1620 return equalIgnoringASCIICase(characters16(), prefix.characters16(), | |
| 1621 prefix.length()); | |
| 1622 } | |
| 1623 | |
| 1624 bool StringImpl::endsWith(UChar character) const { | |
| 1625 return m_length && (*this)[m_length - 1] == character; | |
| 1626 } | |
| 1627 | |
| 1628 bool StringImpl::endsWith(const StringView& suffix) const { | |
| 1629 if (suffix.length() > length()) | |
| 1630 return false; | |
| 1631 unsigned startOffset = length() - suffix.length(); | |
| 1632 if (is8Bit()) { | |
| 1633 if (suffix.is8Bit()) | |
| 1634 return equal(characters8() + startOffset, suffix.characters8(), | |
| 1635 suffix.length()); | |
| 1636 return equal(characters8() + startOffset, suffix.characters16(), | |
| 1637 suffix.length()); | |
| 1638 } | |
| 1639 if (suffix.is8Bit()) | |
| 1640 return equal(characters16() + startOffset, suffix.characters8(), | |
| 1641 suffix.length()); | |
| 1642 return equal(characters16() + startOffset, suffix.characters16(), | |
| 1643 suffix.length()); | |
| 1644 } | |
| 1645 | |
| 1646 bool StringImpl::endsWithIgnoringCase(const StringView& suffix) const { | |
| 1647 if (suffix.length() > length()) | |
| 1648 return false; | |
| 1649 unsigned startOffset = length() - suffix.length(); | |
| 1650 if (is8Bit()) { | |
| 1651 if (suffix.is8Bit()) | |
| 1652 return equalIgnoringCase(characters8() + startOffset, | |
| 1653 suffix.characters8(), suffix.length()); | |
| 1654 return equalIgnoringCase(characters8() + startOffset, suffix.characters16(), | |
| 1655 suffix.length()); | |
| 1656 } | |
| 1657 if (suffix.is8Bit()) | |
| 1658 return equalIgnoringCase(characters16() + startOffset, suffix.characters8(), | |
| 1659 suffix.length()); | |
| 1660 return equalIgnoringCase(characters16() + startOffset, suffix.characters16(), | |
| 1661 suffix.length()); | |
| 1662 } | |
| 1663 | |
| 1664 bool StringImpl::endsWithIgnoringASCIICase(const StringView& suffix) const { | |
| 1665 if (suffix.length() > length()) | |
| 1666 return false; | |
| 1667 unsigned startOffset = length() - suffix.length(); | |
| 1668 if (is8Bit()) { | |
| 1669 if (suffix.is8Bit()) | |
| 1670 return equalIgnoringASCIICase(characters8() + startOffset, | |
| 1671 suffix.characters8(), suffix.length()); | |
| 1672 return equalIgnoringASCIICase(characters8() + startOffset, | |
| 1673 suffix.characters16(), suffix.length()); | |
| 1674 } | |
| 1675 if (suffix.is8Bit()) | |
| 1676 return equalIgnoringASCIICase(characters16() + startOffset, | |
| 1677 suffix.characters8(), suffix.length()); | |
| 1678 return equalIgnoringASCIICase(characters16() + startOffset, | |
| 1679 suffix.characters16(), suffix.length()); | |
| 1680 } | |
| 1681 | |
| 1682 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC) { | |
| 1683 if (oldC == newC) | |
| 1684 return this; | |
| 1685 | |
| 1686 if (find(oldC) == kNotFound) | |
| 1687 return this; | |
| 1688 | |
| 1689 unsigned i; | |
| 1690 if (is8Bit()) { | |
| 1691 if (newC <= 0xff) { | |
| 1692 LChar* data; | |
| 1693 LChar oldChar = static_cast<LChar>(oldC); | |
| 1694 LChar newChar = static_cast<LChar>(newC); | |
| 1695 | |
| 1696 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
| 1697 | |
| 1698 for (i = 0; i != m_length; ++i) { | |
| 1699 LChar ch = characters8()[i]; | |
| 1700 if (ch == oldChar) | |
| 1701 ch = newChar; | |
| 1702 data[i] = ch; | |
| 1703 } | |
| 1704 return newImpl.release(); | |
| 1705 } | |
| 1706 | |
| 1707 // There is the possibility we need to up convert from 8 to 16 bit, | |
| 1708 // create a 16 bit string for the result. | |
| 1709 UChar* data; | |
| 1710 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
| 1711 | |
| 1712 for (i = 0; i != m_length; ++i) { | |
| 1713 UChar ch = characters8()[i]; | |
| 1714 if (ch == oldC) | |
| 1715 ch = newC; | |
| 1716 data[i] = ch; | |
| 1717 } | |
| 1718 | |
| 1719 return newImpl.release(); | |
| 1720 } | |
| 1721 | |
| 1722 UChar* data; | |
| 1723 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
| 1724 | |
| 1725 for (i = 0; i != m_length; ++i) { | |
| 1726 UChar ch = characters16()[i]; | |
| 1727 if (ch == oldC) | |
| 1728 ch = newC; | |
| 1729 data[i] = ch; | |
| 1730 } | |
| 1731 return newImpl.release(); | |
| 1732 } | |
| 1733 | |
| 1734 // TODO(esprehn): Passing a null replacement is the same as empty string for | |
| 1735 // this method but all others treat null as a no-op. We should choose one | |
| 1736 // behavior. | |
| 1737 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, | |
| 1738 unsigned lengthToReplace, | |
| 1739 const StringView& string) { | |
| 1740 position = min(position, length()); | |
| 1741 lengthToReplace = min(lengthToReplace, length() - position); | |
| 1742 unsigned lengthToInsert = string.length(); | |
| 1743 if (!lengthToReplace && !lengthToInsert) | |
| 1744 return this; | |
| 1745 | |
| 1746 RELEASE_ASSERT((length() - lengthToReplace) < | |
| 1747 (numeric_limits<unsigned>::max() - lengthToInsert)); | |
| 1748 | |
| 1749 if (is8Bit() && (string.isNull() || string.is8Bit())) { | |
| 1750 LChar* data; | |
| 1751 RefPtr<StringImpl> newImpl = | |
| 1752 createUninitialized(length() - lengthToReplace + lengthToInsert, data); | |
| 1753 memcpy(data, characters8(), position * sizeof(LChar)); | |
| 1754 if (!string.isNull()) | |
| 1755 memcpy(data + position, string.characters8(), | |
| 1756 lengthToInsert * sizeof(LChar)); | |
| 1757 memcpy(data + position + lengthToInsert, | |
| 1758 characters8() + position + lengthToReplace, | |
| 1759 (length() - position - lengthToReplace) * sizeof(LChar)); | |
| 1760 return newImpl.release(); | |
| 1761 } | |
| 1762 UChar* data; | |
| 1763 RefPtr<StringImpl> newImpl = | |
| 1764 createUninitialized(length() - lengthToReplace + lengthToInsert, data); | |
| 1765 if (is8Bit()) | |
| 1766 for (unsigned i = 0; i < position; ++i) | |
| 1767 data[i] = characters8()[i]; | |
| 1768 else | |
| 1769 memcpy(data, characters16(), position * sizeof(UChar)); | |
| 1770 if (!string.isNull()) { | |
| 1771 if (string.is8Bit()) | |
| 1772 for (unsigned i = 0; i < lengthToInsert; ++i) | |
| 1773 data[i + position] = string.characters8()[i]; | |
| 1774 else | |
| 1775 memcpy(data + position, string.characters16(), | |
| 1776 lengthToInsert * sizeof(UChar)); | |
| 1777 } | |
| 1778 if (is8Bit()) { | |
| 1779 for (unsigned i = 0; i < length() - position - lengthToReplace; ++i) | |
| 1780 data[i + position + lengthToInsert] = | |
| 1781 characters8()[i + position + lengthToReplace]; | |
| 1782 } else { | |
| 1783 memcpy(data + position + lengthToInsert, | |
| 1784 characters16() + position + lengthToReplace, | |
| 1785 (length() - position - lengthToReplace) * sizeof(UChar)); | |
| 1786 } | |
| 1787 return newImpl.release(); | |
| 1788 } | |
| 1789 | |
| 1790 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, | |
| 1791 const StringView& replacement) { | |
| 1792 if (replacement.isNull()) | |
| 1793 return this; | |
| 1794 if (replacement.is8Bit()) | |
| 1795 return replace(pattern, replacement.characters8(), replacement.length()); | |
| 1796 return replace(pattern, replacement.characters16(), replacement.length()); | |
| 1797 } | |
| 1798 | |
| 1799 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, | |
| 1800 const LChar* replacement, | |
| 1801 unsigned repStrLength) { | |
| 1802 DCHECK(replacement); | |
| 1803 | |
| 1804 size_t srcSegmentStart = 0; | |
| 1805 unsigned matchCount = 0; | |
| 1806 | |
| 1807 // Count the matches. | |
| 1808 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) { | |
| 1809 ++matchCount; | |
| 1810 ++srcSegmentStart; | |
| 1811 } | |
| 1812 | |
| 1813 // If we have 0 matches then we don't have to do any more work. | |
| 1814 if (!matchCount) | |
| 1815 return this; | |
| 1816 | |
| 1817 RELEASE_ASSERT(!repStrLength || | |
| 1818 matchCount <= numeric_limits<unsigned>::max() / repStrLength); | |
| 1819 | |
| 1820 unsigned replaceSize = matchCount * repStrLength; | |
| 1821 unsigned newSize = m_length - matchCount; | |
| 1822 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize)); | |
| 1823 | |
| 1824 newSize += replaceSize; | |
| 1825 | |
| 1826 // Construct the new data. | |
| 1827 size_t srcSegmentEnd; | |
| 1828 unsigned srcSegmentLength; | |
| 1829 srcSegmentStart = 0; | |
| 1830 unsigned dstOffset = 0; | |
| 1831 | |
| 1832 if (is8Bit()) { | |
| 1833 LChar* data; | |
| 1834 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | |
| 1835 | |
| 1836 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { | |
| 1837 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
| 1838 memcpy(data + dstOffset, characters8() + srcSegmentStart, | |
| 1839 srcSegmentLength * sizeof(LChar)); | |
| 1840 dstOffset += srcSegmentLength; | |
| 1841 memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar)); | |
| 1842 dstOffset += repStrLength; | |
| 1843 srcSegmentStart = srcSegmentEnd + 1; | |
| 1844 } | |
| 1845 | |
| 1846 srcSegmentLength = m_length - srcSegmentStart; | |
| 1847 memcpy(data + dstOffset, characters8() + srcSegmentStart, | |
| 1848 srcSegmentLength * sizeof(LChar)); | |
| 1849 | |
| 1850 DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length()); | |
| 1851 | |
| 1852 return newImpl.release(); | |
| 1853 } | |
| 1854 | |
| 1855 UChar* data; | |
| 1856 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | |
| 1857 | |
| 1858 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { | |
| 1859 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
| 1860 memcpy(data + dstOffset, characters16() + srcSegmentStart, | |
| 1861 srcSegmentLength * sizeof(UChar)); | |
| 1862 | |
| 1863 dstOffset += srcSegmentLength; | |
| 1864 for (unsigned i = 0; i < repStrLength; ++i) | |
| 1865 data[i + dstOffset] = replacement[i]; | |
| 1866 | |
| 1867 dstOffset += repStrLength; | |
| 1868 srcSegmentStart = srcSegmentEnd + 1; | |
| 1869 } | |
| 1870 | |
| 1871 srcSegmentLength = m_length - srcSegmentStart; | |
| 1872 memcpy(data + dstOffset, characters16() + srcSegmentStart, | |
| 1873 srcSegmentLength * sizeof(UChar)); | |
| 1874 | |
| 1875 DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length()); | |
| 1876 | |
| 1877 return newImpl.release(); | |
| 1878 } | |
| 1879 | |
| 1880 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, | |
| 1881 const UChar* replacement, | |
| 1882 unsigned repStrLength) { | |
| 1883 DCHECK(replacement); | |
| 1884 | |
| 1885 size_t srcSegmentStart = 0; | |
| 1886 unsigned matchCount = 0; | |
| 1887 | |
| 1888 // Count the matches. | |
| 1889 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) { | |
| 1890 ++matchCount; | |
| 1891 ++srcSegmentStart; | |
| 1892 } | |
| 1893 | |
| 1894 // If we have 0 matches then we don't have to do any more work. | |
| 1895 if (!matchCount) | |
| 1896 return this; | |
| 1897 | |
| 1898 RELEASE_ASSERT(!repStrLength || | |
| 1899 matchCount <= numeric_limits<unsigned>::max() / repStrLength); | |
| 1900 | |
| 1901 unsigned replaceSize = matchCount * repStrLength; | |
| 1902 unsigned newSize = m_length - matchCount; | |
| 1903 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize)); | |
| 1904 | |
| 1905 newSize += replaceSize; | |
| 1906 | |
| 1907 // Construct the new data. | |
| 1908 size_t srcSegmentEnd; | |
| 1909 unsigned srcSegmentLength; | |
| 1910 srcSegmentStart = 0; | |
| 1911 unsigned dstOffset = 0; | |
| 1912 | |
| 1913 if (is8Bit()) { | |
| 1914 UChar* data; | |
| 1915 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | |
| 1916 | |
| 1917 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { | |
| 1918 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
| 1919 for (unsigned i = 0; i < srcSegmentLength; ++i) | |
| 1920 data[i + dstOffset] = characters8()[i + srcSegmentStart]; | |
| 1921 | |
| 1922 dstOffset += srcSegmentLength; | |
| 1923 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar)); | |
| 1924 | |
| 1925 dstOffset += repStrLength; | |
| 1926 srcSegmentStart = srcSegmentEnd + 1; | |
| 1927 } | |
| 1928 | |
| 1929 srcSegmentLength = m_length - srcSegmentStart; | |
| 1930 for (unsigned i = 0; i < srcSegmentLength; ++i) | |
| 1931 data[i + dstOffset] = characters8()[i + srcSegmentStart]; | |
| 1932 | |
| 1933 DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length()); | |
| 1934 | |
| 1935 return newImpl.release(); | |
| 1936 } | |
| 1937 | |
| 1938 UChar* data; | |
| 1939 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | |
| 1940 | |
| 1941 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { | |
| 1942 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
| 1943 memcpy(data + dstOffset, characters16() + srcSegmentStart, | |
| 1944 srcSegmentLength * sizeof(UChar)); | |
| 1945 | |
| 1946 dstOffset += srcSegmentLength; | |
| 1947 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar)); | |
| 1948 | |
| 1949 dstOffset += repStrLength; | |
| 1950 srcSegmentStart = srcSegmentEnd + 1; | |
| 1951 } | |
| 1952 | |
| 1953 srcSegmentLength = m_length - srcSegmentStart; | |
| 1954 memcpy(data + dstOffset, characters16() + srcSegmentStart, | |
| 1955 srcSegmentLength * sizeof(UChar)); | |
| 1956 | |
| 1957 DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length()); | |
| 1958 | |
| 1959 return newImpl.release(); | |
| 1960 } | |
| 1961 | |
| 1962 PassRefPtr<StringImpl> StringImpl::replace(const StringView& pattern, | |
| 1963 const StringView& replacement) { | |
| 1964 if (pattern.isNull() || replacement.isNull()) | |
| 1965 return this; | |
| 1966 | |
| 1967 unsigned patternLength = pattern.length(); | |
| 1968 if (!patternLength) | |
| 1969 return this; | |
| 1970 | |
| 1971 unsigned repStrLength = replacement.length(); | |
| 1972 size_t srcSegmentStart = 0; | |
| 1973 unsigned matchCount = 0; | |
| 1974 | |
| 1975 // Count the matches. | |
| 1976 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) { | |
| 1977 ++matchCount; | |
| 1978 srcSegmentStart += patternLength; | |
| 1979 } | |
| 1980 | |
| 1981 // If we have 0 matches, we don't have to do any more work | |
| 1982 if (!matchCount) | |
| 1983 return this; | |
| 1984 | |
| 1985 unsigned newSize = m_length - matchCount * patternLength; | |
| 1986 RELEASE_ASSERT(!repStrLength || | |
| 1987 matchCount <= numeric_limits<unsigned>::max() / repStrLength); | |
| 1988 | |
| 1989 RELEASE_ASSERT(newSize <= | |
| 1990 (numeric_limits<unsigned>::max() - matchCount * repStrLength)); | |
| 1991 | |
| 1992 newSize += matchCount * repStrLength; | |
| 1993 | |
| 1994 // Construct the new data | |
| 1995 size_t srcSegmentEnd; | |
| 1996 unsigned srcSegmentLength; | |
| 1997 srcSegmentStart = 0; | |
| 1998 unsigned dstOffset = 0; | |
| 1999 bool srcIs8Bit = is8Bit(); | |
| 2000 bool replacementIs8Bit = replacement.is8Bit(); | |
| 2001 | |
| 2002 // There are 4 cases: | |
| 2003 // 1. This and replacement are both 8 bit. | |
| 2004 // 2. This and replacement are both 16 bit. | |
| 2005 // 3. This is 8 bit and replacement is 16 bit. | |
| 2006 // 4. This is 16 bit and replacement is 8 bit. | |
| 2007 if (srcIs8Bit && replacementIs8Bit) { | |
| 2008 // Case 1 | |
| 2009 LChar* data; | |
| 2010 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | |
| 2011 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { | |
| 2012 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
| 2013 memcpy(data + dstOffset, characters8() + srcSegmentStart, | |
| 2014 srcSegmentLength * sizeof(LChar)); | |
| 2015 dstOffset += srcSegmentLength; | |
| 2016 memcpy(data + dstOffset, replacement.characters8(), | |
| 2017 repStrLength * sizeof(LChar)); | |
| 2018 dstOffset += repStrLength; | |
| 2019 srcSegmentStart = srcSegmentEnd + patternLength; | |
| 2020 } | |
| 2021 | |
| 2022 srcSegmentLength = m_length - srcSegmentStart; | |
| 2023 memcpy(data + dstOffset, characters8() + srcSegmentStart, | |
| 2024 srcSegmentLength * sizeof(LChar)); | |
| 2025 | |
| 2026 DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length()); | |
| 2027 | |
| 2028 return newImpl.release(); | |
| 2029 } | |
| 2030 | |
| 2031 UChar* data; | |
| 2032 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | |
| 2033 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { | |
| 2034 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
| 2035 if (srcIs8Bit) { | |
| 2036 // Case 3. | |
| 2037 for (unsigned i = 0; i < srcSegmentLength; ++i) | |
| 2038 data[i + dstOffset] = characters8()[i + srcSegmentStart]; | |
| 2039 } else { | |
| 2040 // Case 2 & 4. | |
| 2041 memcpy(data + dstOffset, characters16() + srcSegmentStart, | |
| 2042 srcSegmentLength * sizeof(UChar)); | |
| 2043 } | |
| 2044 dstOffset += srcSegmentLength; | |
| 2045 if (replacementIs8Bit) { | |
| 2046 // Cases 2 & 3. | |
| 2047 for (unsigned i = 0; i < repStrLength; ++i) | |
| 2048 data[i + dstOffset] = replacement.characters8()[i]; | |
| 2049 } else { | |
| 2050 // Case 4 | |
| 2051 memcpy(data + dstOffset, replacement.characters16(), | |
| 2052 repStrLength * sizeof(UChar)); | |
| 2053 } | |
| 2054 dstOffset += repStrLength; | |
| 2055 srcSegmentStart = srcSegmentEnd + patternLength; | |
| 2056 } | |
| 2057 | |
| 2058 srcSegmentLength = m_length - srcSegmentStart; | |
| 2059 if (srcIs8Bit) { | |
| 2060 // Case 3. | |
| 2061 for (unsigned i = 0; i < srcSegmentLength; ++i) | |
| 2062 data[i + dstOffset] = characters8()[i + srcSegmentStart]; | |
| 2063 } else { | |
| 2064 // Cases 2 & 4. | |
| 2065 memcpy(data + dstOffset, characters16() + srcSegmentStart, | |
| 2066 srcSegmentLength * sizeof(UChar)); | |
| 2067 } | |
| 2068 | |
| 2069 DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length()); | |
| 2070 | |
| 2071 return newImpl.release(); | |
| 2072 } | |
| 2073 | |
| 2074 PassRefPtr<StringImpl> StringImpl::upconvertedString() { | |
| 2075 if (is8Bit()) | |
| 2076 return String::make16BitFrom8BitSource(characters8(), m_length) | |
| 2077 .releaseImpl(); | |
| 2078 return this; | |
| 2079 } | |
| 2080 | |
| 2081 static inline bool stringImplContentEqual(const StringImpl* a, | |
| 2082 const StringImpl* b) { | |
| 2083 unsigned aLength = a->length(); | |
| 2084 unsigned bLength = b->length(); | |
| 2085 if (aLength != bLength) | |
| 2086 return false; | |
| 2087 | |
| 2088 if (a->is8Bit()) { | |
| 2089 if (b->is8Bit()) | |
| 2090 return equal(a->characters8(), b->characters8(), aLength); | |
| 2091 | |
| 2092 return equal(a->characters8(), b->characters16(), aLength); | |
| 2093 } | |
| 2094 | |
| 2095 if (b->is8Bit()) | |
| 2096 return equal(a->characters16(), b->characters8(), aLength); | |
| 2097 | |
| 2098 return equal(a->characters16(), b->characters16(), aLength); | |
| 2099 } | |
| 2100 | |
| 2101 bool equal(const StringImpl* a, const StringImpl* b) { | |
| 2102 if (a == b) | |
| 2103 return true; | |
| 2104 if (!a || !b) | |
| 2105 return false; | |
| 2106 if (a->isAtomic() && b->isAtomic()) | |
| 2107 return false; | |
| 2108 | |
| 2109 return stringImplContentEqual(a, b); | |
| 2110 } | |
| 2111 | |
| 2112 template <typename CharType> | |
| 2113 inline bool equalInternal(const StringImpl* a, | |
| 2114 const CharType* b, | |
| 2115 unsigned length) { | |
| 2116 if (!a) | |
| 2117 return !b; | |
| 2118 if (!b) | |
| 2119 return false; | |
| 2120 | |
| 2121 if (a->length() != length) | |
| 2122 return false; | |
| 2123 if (a->is8Bit()) | |
| 2124 return equal(a->characters8(), b, length); | |
| 2125 return equal(a->characters16(), b, length); | |
| 2126 } | |
| 2127 | |
| 2128 bool equal(const StringImpl* a, const LChar* b, unsigned length) { | |
| 2129 return equalInternal(a, b, length); | |
| 2130 } | |
| 2131 | |
| 2132 bool equal(const StringImpl* a, const UChar* b, unsigned length) { | |
| 2133 return equalInternal(a, b, length); | |
| 2134 } | |
| 2135 | |
| 2136 bool equal(const StringImpl* a, const LChar* b) { | |
| 2137 if (!a) | |
| 2138 return !b; | |
| 2139 if (!b) | |
| 2140 return !a; | |
| 2141 | |
| 2142 unsigned length = a->length(); | |
| 2143 | |
| 2144 if (a->is8Bit()) { | |
| 2145 const LChar* aPtr = a->characters8(); | |
| 2146 for (unsigned i = 0; i != length; ++i) { | |
| 2147 LChar bc = b[i]; | |
| 2148 LChar ac = aPtr[i]; | |
| 2149 if (!bc) | |
| 2150 return false; | |
| 2151 if (ac != bc) | |
| 2152 return false; | |
| 2153 } | |
| 2154 | |
| 2155 return !b[length]; | |
| 2156 } | |
| 2157 | |
| 2158 const UChar* aPtr = a->characters16(); | |
| 2159 for (unsigned i = 0; i != length; ++i) { | |
| 2160 LChar bc = b[i]; | |
| 2161 if (!bc) | |
| 2162 return false; | |
| 2163 if (aPtr[i] != bc) | |
| 2164 return false; | |
| 2165 } | |
| 2166 | |
| 2167 return !b[length]; | |
| 2168 } | |
| 2169 | |
| 2170 bool equalNonNull(const StringImpl* a, const StringImpl* b) { | |
| 2171 DCHECK(a); | |
| 2172 DCHECK(b); | |
| 2173 if (a == b) | |
| 2174 return true; | |
| 2175 | |
| 2176 return stringImplContentEqual(a, b); | |
| 2177 } | |
| 2178 | |
| 2179 bool equalIgnoringNullity(StringImpl* a, StringImpl* b) { | |
| 2180 if (!a && b && !b->length()) | |
| 2181 return true; | |
| 2182 if (!b && a && !a->length()) | |
| 2183 return true; | |
| 2184 return equal(a, b); | |
| 2185 } | |
| 2186 | |
| 2187 template <typename CharacterType1, typename CharacterType2> | |
| 2188 int codePointCompareIgnoringASCIICase(unsigned l1, | |
| 2189 unsigned l2, | |
| 2190 const CharacterType1* c1, | |
| 2191 const CharacterType2* c2) { | |
| 2192 const unsigned lmin = l1 < l2 ? l1 : l2; | |
| 2193 unsigned pos = 0; | |
| 2194 while (pos < lmin && toASCIILower(*c1) == toASCIILower(*c2)) { | |
| 2195 ++c1; | |
| 2196 ++c2; | |
| 2197 ++pos; | |
| 2198 } | |
| 2199 | |
| 2200 if (pos < lmin) | |
| 2201 return (toASCIILower(c1[0]) > toASCIILower(c2[0])) ? 1 : -1; | |
| 2202 | |
| 2203 if (l1 == l2) | |
| 2204 return 0; | |
| 2205 | |
| 2206 return (l1 > l2) ? 1 : -1; | |
| 2207 } | |
| 2208 | |
| 2209 int codePointCompareIgnoringASCIICase(const StringImpl* string1, | |
| 2210 const LChar* string2) { | |
| 2211 unsigned length1 = string1 ? string1->length() : 0; | |
| 2212 size_t length2 = string2 ? strlen(reinterpret_cast<const char*>(string2)) : 0; | |
| 2213 | |
| 2214 if (!string1) | |
| 2215 return length2 > 0 ? -1 : 0; | |
| 2216 | |
| 2217 if (!string2) | |
| 2218 return length1 > 0 ? 1 : 0; | |
| 2219 | |
| 2220 if (string1->is8Bit()) | |
| 2221 return codePointCompareIgnoringASCIICase(length1, length2, | |
| 2222 string1->characters8(), string2); | |
| 2223 return codePointCompareIgnoringASCIICase(length1, length2, | |
| 2224 string1->characters16(), string2); | |
| 2225 } | |
| 2226 | |
| 2227 UChar32 toUpper(UChar32 c, const AtomicString& localeIdentifier) { | |
| 2228 if (!localeIdentifier.isNull()) { | |
| 2229 if (localeIdMatchesLang(localeIdentifier, "tr") || | |
| 2230 localeIdMatchesLang(localeIdentifier, "az")) { | |
| 2231 if (c == 'i') | |
| 2232 return latinCapitalLetterIWithDotAbove; | |
| 2233 if (c == latinSmallLetterDotlessI) | |
| 2234 return 'I'; | |
| 2235 } else if (localeIdMatchesLang(localeIdentifier, "lt")) { | |
| 2236 // TODO(rob.buis) implement upper-casing rules for lt | |
| 2237 // like in StringImpl::upper(locale). | |
| 2238 } | |
| 2239 } | |
| 2240 | |
| 2241 return toUpper(c); | |
| 2242 } | |
| 2243 | |
| 2244 } // namespace WTF | |
| OLD | NEW |