| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org) | |
| 3 * (C) 1999 Antti Koivisto (koivisto@kde.org) | |
| 4 * (C) 2001 Dirk Mueller ( mueller@kde.org ) | |
| 5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All r
ights reserved. | |
| 6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net) | |
| 7 * | |
| 8 * This library is free software; you can redistribute it and/or | |
| 9 * modify it under the terms of the GNU Library General Public | |
| 10 * License as published by the Free Software Foundation; either | |
| 11 * version 2 of the License, or (at your option) any later version. | |
| 12 * | |
| 13 * This library is distributed in the hope that it will be useful, | |
| 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 16 * Library General Public License for more details. | |
| 17 * | |
| 18 * You should have received a copy of the GNU Library General Public License | |
| 19 * along with this library; see the file COPYING.LIB. If not, write to | |
| 20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
| 21 * Boston, MA 02110-1301, USA. | |
| 22 * | |
| 23 */ | |
| 24 | |
| 25 #include "config.h" | |
| 26 #include "StringImpl.h" | |
| 27 | |
| 28 #include "AtomicString.h" | |
| 29 #include "StringBuffer.h" | |
| 30 #include "StringHash.h" | |
| 31 #include <wtf/ProcessID.h> | |
| 32 #include <wtf/StdLibExtras.h> | |
| 33 #include <wtf/WTFThreadData.h> | |
| 34 #include <wtf/unicode/CharacterNames.h> | |
| 35 | |
| 36 #ifdef STRING_STATS | |
| 37 #include <unistd.h> | |
| 38 #include <wtf/DataLog.h> | |
| 39 #endif | |
| 40 | |
| 41 using namespace std; | |
| 42 | |
| 43 namespace WTF { | |
| 44 | |
| 45 using namespace Unicode; | |
| 46 | |
| 47 COMPILE_ASSERT(sizeof(StringImpl) == 2 * sizeof(int) + 3 * sizeof(void*), String
Impl_should_stay_small); | |
| 48 | |
| 49 #ifdef STRING_STATS | |
| 50 StringStats StringImpl::m_stringStats; | |
| 51 | |
| 52 unsigned StringStats::s_stringRemovesTillPrintStats = StringStats::s_printString
StatsFrequency; | |
| 53 | |
| 54 void StringStats::removeString(StringImpl* string) | |
| 55 { | |
| 56 unsigned length = string->length(); | |
| 57 bool isSubString = string->isSubString(); | |
| 58 | |
| 59 --m_totalNumberStrings; | |
| 60 | |
| 61 if (string->has16BitShadow()) { | |
| 62 --m_numberUpconvertedStrings; | |
| 63 if (!isSubString) | |
| 64 m_totalUpconvertedData -= length; | |
| 65 } | |
| 66 | |
| 67 if (string->is8Bit()) { | |
| 68 --m_number8BitStrings; | |
| 69 if (!isSubString) | |
| 70 m_total8BitData -= length; | |
| 71 } else { | |
| 72 --m_number16BitStrings; | |
| 73 if (!isSubString) | |
| 74 m_total16BitData -= length; | |
| 75 } | |
| 76 | |
| 77 if (!--s_stringRemovesTillPrintStats) { | |
| 78 s_stringRemovesTillPrintStats = s_printStringStatsFrequency; | |
| 79 printStats(); | |
| 80 } | |
| 81 } | |
| 82 | |
| 83 void StringStats::printStats() | |
| 84 { | |
| 85 dataLogF("String stats for process id %d:\n", getCurrentProcessID()); | |
| 86 | |
| 87 unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitDat
a; | |
| 88 double percent8Bit = m_totalNumberStrings ? ((double)m_number8BitStrings * 1
00) / (double)m_totalNumberStrings : 0.0; | |
| 89 double average8bitLength = m_number8BitStrings ? (double)m_total8BitData / (
double)m_number8BitStrings : 0.0; | |
| 90 dataLogF("%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length
%6.1f\n", m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData, av
erage8bitLength); | |
| 91 | |
| 92 double percent16Bit = m_totalNumberStrings ? ((double)m_number16BitStrings *
100) / (double)m_totalNumberStrings : 0.0; | |
| 93 double average16bitLength = m_number16BitStrings ? (double)m_total16BitData
/ (double)m_number16BitStrings : 0.0; | |
| 94 dataLogF("%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length
%6.1f\n", m_number16BitStrings, percent16Bit, m_total16BitData, m_total16BitData
* 2, average16bitLength); | |
| 95 | |
| 96 double percentUpconverted = m_totalNumberStrings ? ((double)m_numberUpconver
tedStrings * 100) / (double)m_number8BitStrings : 0.0; | |
| 97 double averageUpconvertedLength = m_numberUpconvertedStrings ? (double)m_tot
alUpconvertedData / (double)m_numberUpconvertedStrings : 0.0; | |
| 98 dataLogF("%8u (%5.2f%%) upconverted %12llu chars %12llu bytes avg length
%6.1f\n", m_numberUpconvertedStrings, percentUpconverted, m_totalUpconvertedData
, m_totalUpconvertedData * 2, averageUpconvertedLength); | |
| 99 | |
| 100 double averageLength = m_totalNumberStrings ? (double)totalNumberCharacters
/ (double)m_totalNumberStrings : 0.0; | |
| 101 unsigned long long totalDataBytes = m_total8BitData + (m_total16BitData + m_
totalUpconvertedData) * 2; | |
| 102 dataLogF("%8u Total %12llu chars %12llu bytes avg length %
6.1f\n", m_totalNumberStrings, totalNumberCharacters, totalDataBytes, averageLen
gth); | |
| 103 unsigned long long totalSavedBytes = m_total8BitData - m_totalUpconvertedDat
a; | |
| 104 double percentSavings = totalSavedBytes ? ((double)totalSavedBytes * 100) /
(double)(totalDataBytes + totalSavedBytes) : 0.0; | |
| 105 dataLogF(" Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes,
percentSavings); | |
| 106 } | |
| 107 #endif | |
| 108 | |
| 109 | |
| 110 StringImpl::~StringImpl() | |
| 111 { | |
| 112 ASSERT(!isStatic()); | |
| 113 | |
| 114 STRING_STATS_REMOVE_STRING(this); | |
| 115 | |
| 116 if (isAtomic()) | |
| 117 AtomicString::remove(this); | |
| 118 | |
| 119 BufferOwnership ownership = bufferOwnership(); | |
| 120 | |
| 121 if (has16BitShadow()) { | |
| 122 ASSERT(m_copyData16); | |
| 123 fastFree(m_copyData16); | |
| 124 } | |
| 125 | |
| 126 if (ownership == BufferInternal) | |
| 127 return; | |
| 128 if (ownership == BufferOwned) { | |
| 129 // We use m_data8, but since it is a union with m_data16 this works eith
er way. | |
| 130 ASSERT(m_data8); | |
| 131 fastFree(const_cast<LChar*>(m_data8)); | |
| 132 return; | |
| 133 } | |
| 134 ASSERT(ownership == BufferSubstring); | |
| 135 ASSERT(m_substringBuffer); | |
| 136 m_substringBuffer->deref(); | |
| 137 } | |
| 138 | |
| 139 PassRefPtr<StringImpl> StringImpl::createFromLiteral(const char* characters, uns
igned length) | |
| 140 { | |
| 141 ASSERT_WITH_MESSAGE(length, "Use StringImpl::empty() to create an empty stri
ng"); | |
| 142 ASSERT(charactersAreAllASCII<LChar>(reinterpret_cast<const LChar*>(character
s), length)); | |
| 143 return adoptRef(new StringImpl(characters, length, ConstructFromLiteral)); | |
| 144 } | |
| 145 | |
| 146 PassRefPtr<StringImpl> StringImpl::createFromLiteral(const char* characters) | |
| 147 { | |
| 148 size_t length = strlen(characters); | |
| 149 ASSERT_WITH_MESSAGE(length, "Use StringImpl::empty() to create an empty stri
ng"); | |
| 150 ASSERT(charactersAreAllASCII<LChar>(reinterpret_cast<const LChar*>(character
s), length)); | |
| 151 return adoptRef(new StringImpl(characters, length, ConstructFromLiteral)); | |
| 152 } | |
| 153 | |
| 154 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, LChar*&
data) | |
| 155 { | |
| 156 if (!length) { | |
| 157 data = 0; | |
| 158 return empty(); | |
| 159 } | |
| 160 | |
| 161 // Allocate a single buffer large enough to contain the StringImpl | |
| 162 // struct as well as the data which it contains. This removes one | |
| 163 // heap allocation from this call. | |
| 164 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(Str
ingImpl)) / sizeof(LChar))); | |
| 165 size_t size = sizeof(StringImpl) + length * sizeof(LChar); | |
| 166 StringImpl* string = static_cast<StringImpl*>(fastMalloc(size)); | |
| 167 | |
| 168 data = reinterpret_cast<LChar*>(string + 1); | |
| 169 return adoptRef(new (NotNull, string) StringImpl(length, Force8BitConstructo
r)); | |
| 170 } | |
| 171 | |
| 172 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*&
data) | |
| 173 { | |
| 174 if (!length) { | |
| 175 data = 0; | |
| 176 return empty(); | |
| 177 } | |
| 178 | |
| 179 // Allocate a single buffer large enough to contain the StringImpl | |
| 180 // struct as well as the data which it contains. This removes one | |
| 181 // heap allocation from this call. | |
| 182 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(Str
ingImpl)) / sizeof(UChar))); | |
| 183 size_t size = sizeof(StringImpl) + length * sizeof(UChar); | |
| 184 StringImpl* string = static_cast<StringImpl*>(fastMalloc(size)); | |
| 185 | |
| 186 data = reinterpret_cast<UChar*>(string + 1); | |
| 187 return adoptRef(new (NotNull, string) StringImpl(length)); | |
| 188 } | |
| 189 | |
| 190 PassRefPtr<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalStr
ing, unsigned length, LChar*& data) | |
| 191 { | |
| 192 ASSERT(originalString->is8Bit()); | |
| 193 ASSERT(originalString->hasOneRef()); | |
| 194 ASSERT(originalString->bufferOwnership() == BufferInternal); | |
| 195 | |
| 196 if (!length) { | |
| 197 data = 0; | |
| 198 return empty(); | |
| 199 } | |
| 200 | |
| 201 // Same as createUninitialized() except here we use fastRealloc. | |
| 202 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(Str
ingImpl)) / sizeof(LChar))); | |
| 203 size_t size = sizeof(StringImpl) + length * sizeof(LChar); | |
| 204 originalString->~StringImpl(); | |
| 205 StringImpl* string = static_cast<StringImpl*>(fastRealloc(originalString.lea
kRef(), size)); | |
| 206 | |
| 207 data = reinterpret_cast<LChar*>(string + 1); | |
| 208 return adoptRef(new (NotNull, string) StringImpl(length, Force8BitConstructo
r)); | |
| 209 } | |
| 210 | |
| 211 PassRefPtr<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalStr
ing, unsigned length, UChar*& data) | |
| 212 { | |
| 213 ASSERT(!originalString->is8Bit()); | |
| 214 ASSERT(originalString->hasOneRef()); | |
| 215 ASSERT(originalString->bufferOwnership() == BufferInternal); | |
| 216 | |
| 217 if (!length) { | |
| 218 data = 0; | |
| 219 return empty(); | |
| 220 } | |
| 221 | |
| 222 // Same as createUninitialized() except here we use fastRealloc. | |
| 223 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(Str
ingImpl)) / sizeof(UChar))); | |
| 224 size_t size = sizeof(StringImpl) + length * sizeof(UChar); | |
| 225 originalString->~StringImpl(); | |
| 226 StringImpl* string = static_cast<StringImpl*>(fastRealloc(originalString.lea
kRef(), size)); | |
| 227 | |
| 228 data = reinterpret_cast<UChar*>(string + 1); | |
| 229 return adoptRef(new (NotNull, string) StringImpl(length)); | |
| 230 } | |
| 231 | |
| 232 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned leng
th) | |
| 233 { | |
| 234 if (!characters || !length) | |
| 235 return empty(); | |
| 236 | |
| 237 UChar* data; | |
| 238 RefPtr<StringImpl> string = createUninitialized(length, data); | |
| 239 memcpy(data, characters, length * sizeof(UChar)); | |
| 240 return string.release(); | |
| 241 } | |
| 242 | |
| 243 PassRefPtr<StringImpl> StringImpl::create(const LChar* characters, unsigned leng
th) | |
| 244 { | |
| 245 if (!characters || !length) | |
| 246 return empty(); | |
| 247 | |
| 248 LChar* data; | |
| 249 RefPtr<StringImpl> string = createUninitialized(length, data); | |
| 250 memcpy(data, characters, length * sizeof(LChar)); | |
| 251 return string.release(); | |
| 252 } | |
| 253 | |
| 254 PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters,
unsigned length) | |
| 255 { | |
| 256 if (!characters || !length) | |
| 257 return empty(); | |
| 258 | |
| 259 LChar* data; | |
| 260 RefPtr<StringImpl> string = createUninitialized(length, data); | |
| 261 | |
| 262 for (size_t i = 0; i < length; ++i) { | |
| 263 if (characters[i] & 0xff00) | |
| 264 return create(characters, length); | |
| 265 data[i] = static_cast<LChar>(characters[i]); | |
| 266 } | |
| 267 | |
| 268 return string.release(); | |
| 269 } | |
| 270 | |
| 271 PassRefPtr<StringImpl> StringImpl::create(const LChar* string) | |
| 272 { | |
| 273 if (!string) | |
| 274 return empty(); | |
| 275 size_t length = strlen(reinterpret_cast<const char*>(string)); | |
| 276 RELEASE_ASSERT(length <= numeric_limits<unsigned>::max()); | |
| 277 return create(string, length); | |
| 278 } | |
| 279 | |
| 280 const UChar* StringImpl::getData16SlowCase() const | |
| 281 { | |
| 282 if (has16BitShadow()) | |
| 283 return m_copyData16; | |
| 284 | |
| 285 if (bufferOwnership() == BufferSubstring) { | |
| 286 // If this is a substring, return a pointer into the parent string. | |
| 287 // TODO: Consider severing this string from the parent string | |
| 288 unsigned offset = m_data8 - m_substringBuffer->characters8(); | |
| 289 return m_substringBuffer->characters() + offset; | |
| 290 } | |
| 291 | |
| 292 STRING_STATS_ADD_UPCONVERTED_STRING(m_length); | |
| 293 | |
| 294 unsigned len = length(); | |
| 295 if (hasTerminatingNullCharacter()) | |
| 296 ++len; | |
| 297 | |
| 298 m_copyData16 = static_cast<UChar*>(fastMalloc(len * sizeof(UChar))); | |
| 299 | |
| 300 m_hashAndFlags |= s_hashFlagHas16BitShadow; | |
| 301 | |
| 302 upconvertCharacters(0, len); | |
| 303 | |
| 304 return m_copyData16; | |
| 305 } | |
| 306 | |
| 307 void StringImpl::upconvertCharacters(unsigned start, unsigned end) const | |
| 308 { | |
| 309 ASSERT(is8Bit()); | |
| 310 ASSERT(has16BitShadow()); | |
| 311 | |
| 312 for (size_t i = start; i < end; ++i) | |
| 313 m_copyData16[i] = m_data8[i]; | |
| 314 } | |
| 315 | |
| 316 | |
| 317 bool StringImpl::containsOnlyWhitespace() | |
| 318 { | |
| 319 // FIXME: The definition of whitespace here includes a number of characters | |
| 320 // that are not whitespace from the point of view of RenderText; I wonder if | |
| 321 // that's a problem in practice. | |
| 322 if (is8Bit()) { | |
| 323 for (unsigned i = 0; i < m_length; ++i) { | |
| 324 UChar c = m_data8[i]; | |
| 325 if (!isASCIISpace(c)) | |
| 326 return false; | |
| 327 } | |
| 328 | |
| 329 return true; | |
| 330 } | |
| 331 | |
| 332 for (unsigned i = 0; i < m_length; ++i) { | |
| 333 UChar c = m_data16[i]; | |
| 334 if (!isASCIISpace(c)) | |
| 335 return false; | |
| 336 } | |
| 337 return true; | |
| 338 } | |
| 339 | |
| 340 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length) | |
| 341 { | |
| 342 if (start >= m_length) | |
| 343 return empty(); | |
| 344 unsigned maxLength = m_length - start; | |
| 345 if (length >= maxLength) { | |
| 346 if (!start) | |
| 347 return this; | |
| 348 length = maxLength; | |
| 349 } | |
| 350 if (is8Bit()) | |
| 351 return create(m_data8 + start, length); | |
| 352 | |
| 353 return create(m_data16 + start, length); | |
| 354 } | |
| 355 | |
| 356 UChar32 StringImpl::characterStartingAt(unsigned i) | |
| 357 { | |
| 358 if (is8Bit()) | |
| 359 return m_data8[i]; | |
| 360 if (U16_IS_SINGLE(m_data16[i])) | |
| 361 return m_data16[i]; | |
| 362 if (i + 1 < m_length && U16_IS_LEAD(m_data16[i]) && U16_IS_TRAIL(m_data16[i
+ 1])) | |
| 363 return U16_GET_SUPPLEMENTARY(m_data16[i], m_data16[i + 1]); | |
| 364 return 0; | |
| 365 } | |
| 366 | |
| 367 PassRefPtr<StringImpl> StringImpl::lower() | |
| 368 { | |
| 369 // Note: This is a hot function in the Dromaeo benchmark, specifically the | |
| 370 // no-op code path up through the first 'return' statement. | |
| 371 | |
| 372 // First scan the string for uppercase and non-ASCII characters: | |
| 373 bool noUpper = true; | |
| 374 UChar ored = 0; | |
| 375 if (is8Bit()) { | |
| 376 const LChar* end = m_data8 + m_length; | |
| 377 for (const LChar* chp = m_data8; chp != end; ++chp) { | |
| 378 if (UNLIKELY(isASCIIUpper(*chp))) | |
| 379 noUpper = false; | |
| 380 ored |= *chp; | |
| 381 } | |
| 382 // Nothing to do if the string is all ASCII with no uppercase. | |
| 383 if (noUpper && !(ored & ~0x7F)) | |
| 384 return this; | |
| 385 | |
| 386 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>
::max())); | |
| 387 int32_t length = m_length; | |
| 388 | |
| 389 LChar* data8; | |
| 390 RefPtr<StringImpl> newImpl = createUninitialized(length, data8); | |
| 391 | |
| 392 if (!(ored & ~0x7F)) { | |
| 393 for (int32_t i = 0; i < length; ++i) | |
| 394 data8[i] = toASCIILower(m_data8[i]); | |
| 395 | |
| 396 return newImpl.release(); | |
| 397 } | |
| 398 | |
| 399 // Do a slower implementation for cases that include non-ASCII Latin-1 c
haracters. | |
| 400 for (int32_t i = 0; i < length; ++i) | |
| 401 data8[i] = static_cast<LChar>(Unicode::toLower(m_data8[i])); | |
| 402 | |
| 403 return newImpl.release(); | |
| 404 } | |
| 405 | |
| 406 const UChar *end = m_data16 + m_length; | |
| 407 for (const UChar* chp = m_data16; chp != end; ++chp) { | |
| 408 if (UNLIKELY(isASCIIUpper(*chp))) | |
| 409 noUpper = false; | |
| 410 ored |= *chp; | |
| 411 } | |
| 412 // Nothing to do if the string is all ASCII with no uppercase. | |
| 413 if (noUpper && !(ored & ~0x7F)) | |
| 414 return this; | |
| 415 | |
| 416 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::ma
x())); | |
| 417 int32_t length = m_length; | |
| 418 | |
| 419 if (!(ored & ~0x7F)) { | |
| 420 UChar* data16; | |
| 421 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); | |
| 422 | |
| 423 for (int32_t i = 0; i < length; ++i) { | |
| 424 UChar c = m_data16[i]; | |
| 425 data16[i] = toASCIILower(c); | |
| 426 } | |
| 427 return newImpl.release(); | |
| 428 } | |
| 429 | |
| 430 // Do a slower implementation for cases that include non-ASCII characters. | |
| 431 UChar* data16; | |
| 432 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); | |
| 433 | |
| 434 bool error; | |
| 435 int32_t realLength = Unicode::toLower(data16, length, m_data16, m_length, &e
rror); | |
| 436 if (!error && realLength == length) | |
| 437 return newImpl.release(); | |
| 438 | |
| 439 newImpl = createUninitialized(realLength, data16); | |
| 440 Unicode::toLower(data16, realLength, m_data16, m_length, &error); | |
| 441 if (error) | |
| 442 return this; | |
| 443 return newImpl.release(); | |
| 444 } | |
| 445 | |
| 446 PassRefPtr<StringImpl> StringImpl::upper() | |
| 447 { | |
| 448 // This function could be optimized for no-op cases the way lower() is, | |
| 449 // but in empirical testing, few actual calls to upper() are no-ops, so | |
| 450 // it wouldn't be worth the extra time for pre-scanning. | |
| 451 | |
| 452 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::ma
x())); | |
| 453 int32_t length = m_length; | |
| 454 | |
| 455 if (is8Bit()) { | |
| 456 LChar* data8; | |
| 457 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8); | |
| 458 | |
| 459 // Do a faster loop for the case where all the characters are ASCII. | |
| 460 LChar ored = 0; | |
| 461 for (int i = 0; i < length; ++i) { | |
| 462 LChar c = m_data8[i]; | |
| 463 ored |= c; | |
| 464 data8[i] = toASCIIUpper(c); | |
| 465 } | |
| 466 if (!(ored & ~0x7F)) | |
| 467 return newImpl.release(); | |
| 468 | |
| 469 // Do a slower implementation for cases that include non-ASCII Latin-1 c
haracters. | |
| 470 int numberSharpSCharacters = 0; | |
| 471 | |
| 472 // There are two special cases. | |
| 473 // 1. latin-1 characters when converted to upper case are 16 bit charac
ters. | |
| 474 // 2. Lower case sharp-S converts to "SS" (two characters) | |
| 475 for (int32_t i = 0; i < length; ++i) { | |
| 476 LChar c = m_data8[i]; | |
| 477 if (UNLIKELY(c == smallLetterSharpS)) | |
| 478 ++numberSharpSCharacters; | |
| 479 UChar upper = Unicode::toUpper(c); | |
| 480 if (UNLIKELY(upper > 0xff)) { | |
| 481 // Since this upper-cased character does not fit in an 8-bit str
ing, we need to take the 16-bit path. | |
| 482 goto upconvert; | |
| 483 } | |
| 484 data8[i] = static_cast<LChar>(upper); | |
| 485 } | |
| 486 | |
| 487 if (!numberSharpSCharacters) | |
| 488 return newImpl.release(); | |
| 489 | |
| 490 // We have numberSSCharacters sharp-s characters, but none of the other
special characters. | |
| 491 newImpl = createUninitialized(m_length + numberSharpSCharacters, data8); | |
| 492 | |
| 493 LChar* dest = data8; | |
| 494 | |
| 495 for (int32_t i = 0; i < length; ++i) { | |
| 496 LChar c = m_data8[i]; | |
| 497 if (c == smallLetterSharpS) { | |
| 498 *dest++ = 'S'; | |
| 499 *dest++ = 'S'; | |
| 500 } else | |
| 501 *dest++ = static_cast<LChar>(Unicode::toUpper(c)); | |
| 502 } | |
| 503 | |
| 504 return newImpl.release(); | |
| 505 } | |
| 506 | |
| 507 upconvert: | |
| 508 const UChar* source16 = characters(); | |
| 509 | |
| 510 UChar* data16; | |
| 511 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); | |
| 512 | |
| 513 // Do a faster loop for the case where all the characters are ASCII. | |
| 514 UChar ored = 0; | |
| 515 for (int i = 0; i < length; ++i) { | |
| 516 UChar c = source16[i]; | |
| 517 ored |= c; | |
| 518 data16[i] = toASCIIUpper(c); | |
| 519 } | |
| 520 if (!(ored & ~0x7F)) | |
| 521 return newImpl.release(); | |
| 522 | |
| 523 // Do a slower implementation for cases that include non-ASCII characters. | |
| 524 bool error; | |
| 525 newImpl = createUninitialized(m_length, data16); | |
| 526 int32_t realLength = Unicode::toUpper(data16, length, source16, m_length, &e
rror); | |
| 527 if (!error && realLength == length) | |
| 528 return newImpl; | |
| 529 newImpl = createUninitialized(realLength, data16); | |
| 530 Unicode::toUpper(data16, realLength, source16, m_length, &error); | |
| 531 if (error) | |
| 532 return this; | |
| 533 return newImpl.release(); | |
| 534 } | |
| 535 | |
| 536 PassRefPtr<StringImpl> StringImpl::fill(UChar character) | |
| 537 { | |
| 538 if (!m_length) | |
| 539 return this; | |
| 540 | |
| 541 if (!(character & ~0x7F)) { | |
| 542 LChar* data; | |
| 543 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
| 544 for (unsigned i = 0; i < m_length; ++i) | |
| 545 data[i] = character; | |
| 546 return newImpl.release(); | |
| 547 } | |
| 548 UChar* data; | |
| 549 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
| 550 for (unsigned i = 0; i < m_length; ++i) | |
| 551 data[i] = character; | |
| 552 return newImpl.release(); | |
| 553 } | |
| 554 | |
| 555 PassRefPtr<StringImpl> StringImpl::foldCase() | |
| 556 { | |
| 557 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::ma
x())); | |
| 558 int32_t length = m_length; | |
| 559 | |
| 560 if (is8Bit()) { | |
| 561 // Do a faster loop for the case where all the characters are ASCII. | |
| 562 LChar* data; | |
| 563 RefPtr <StringImpl>newImpl = createUninitialized(m_length, data); | |
| 564 LChar ored = 0; | |
| 565 | |
| 566 for (int32_t i = 0; i < length; ++i) { | |
| 567 LChar c = m_data8[i]; | |
| 568 data[i] = toASCIILower(c); | |
| 569 ored |= c; | |
| 570 } | |
| 571 | |
| 572 if (!(ored & ~0x7F)) | |
| 573 return newImpl.release(); | |
| 574 | |
| 575 // Do a slower implementation for cases that include non-ASCII Latin-1 c
haracters. | |
| 576 for (int32_t i = 0; i < length; ++i) | |
| 577 data[i] = static_cast<LChar>(Unicode::toLower(m_data8[i])); | |
| 578 | |
| 579 return newImpl.release(); | |
| 580 } | |
| 581 | |
| 582 // Do a faster loop for the case where all the characters are ASCII. | |
| 583 UChar* data; | |
| 584 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
| 585 UChar ored = 0; | |
| 586 for (int32_t i = 0; i < length; ++i) { | |
| 587 UChar c = m_data16[i]; | |
| 588 ored |= c; | |
| 589 data[i] = toASCIILower(c); | |
| 590 } | |
| 591 if (!(ored & ~0x7F)) | |
| 592 return newImpl.release(); | |
| 593 | |
| 594 // Do a slower implementation for cases that include non-ASCII characters. | |
| 595 bool error; | |
| 596 int32_t realLength = Unicode::foldCase(data, length, m_data16, m_length, &er
ror); | |
| 597 if (!error && realLength == length) | |
| 598 return newImpl.release(); | |
| 599 newImpl = createUninitialized(realLength, data); | |
| 600 Unicode::foldCase(data, realLength, m_data16, m_length, &error); | |
| 601 if (error) | |
| 602 return this; | |
| 603 return newImpl.release(); | |
| 604 } | |
| 605 | |
| 606 template <class UCharPredicate> | |
| 607 inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters(UCharPredicate
predicate) | |
| 608 { | |
| 609 if (!m_length) | |
| 610 return empty(); | |
| 611 | |
| 612 unsigned start = 0; | |
| 613 unsigned end = m_length - 1; | |
| 614 | |
| 615 // skip white space from start | |
| 616 while (start <= end && predicate(is8Bit() ? m_data8[start] : m_data16[start]
)) | |
| 617 ++start; | |
| 618 | |
| 619 // only white space | |
| 620 if (start > end) | |
| 621 return empty(); | |
| 622 | |
| 623 // skip white space from end | |
| 624 while (end && predicate(is8Bit() ? m_data8[end] : m_data16[end])) | |
| 625 --end; | |
| 626 | |
| 627 if (!start && end == m_length - 1) | |
| 628 return this; | |
| 629 if (is8Bit()) | |
| 630 return create(m_data8 + start, end + 1 - start); | |
| 631 return create(m_data16 + start, end + 1 - start); | |
| 632 } | |
| 633 | |
| 634 class UCharPredicate { | |
| 635 public: | |
| 636 inline UCharPredicate(CharacterMatchFunctionPtr function): m_function(functi
on) { } | |
| 637 | |
| 638 inline bool operator()(UChar ch) const | |
| 639 { | |
| 640 return m_function(ch); | |
| 641 } | |
| 642 | |
| 643 private: | |
| 644 const CharacterMatchFunctionPtr m_function; | |
| 645 }; | |
| 646 | |
| 647 class SpaceOrNewlinePredicate { | |
| 648 public: | |
| 649 inline bool operator()(UChar ch) const | |
| 650 { | |
| 651 return isSpaceOrNewline(ch); | |
| 652 } | |
| 653 }; | |
| 654 | |
| 655 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace() | |
| 656 { | |
| 657 return stripMatchedCharacters(SpaceOrNewlinePredicate()); | |
| 658 } | |
| 659 | |
| 660 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhi
teSpace) | |
| 661 { | |
| 662 return stripMatchedCharacters(UCharPredicate(isWhiteSpace)); | |
| 663 } | |
| 664 | |
| 665 template <typename CharType> | |
| 666 ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters(const CharType
* characters, CharacterMatchFunctionPtr findMatch) | |
| 667 { | |
| 668 const CharType* from = characters; | |
| 669 const CharType* fromend = from + m_length; | |
| 670 | |
| 671 // Assume the common case will not remove any characters | |
| 672 while (from != fromend && !findMatch(*from)) | |
| 673 ++from; | |
| 674 if (from == fromend) | |
| 675 return this; | |
| 676 | |
| 677 StringBuffer<CharType> data(m_length); | |
| 678 CharType* to = data.characters(); | |
| 679 unsigned outc = from - characters; | |
| 680 | |
| 681 if (outc) | |
| 682 memcpy(to, characters, outc * sizeof(CharType)); | |
| 683 | |
| 684 while (true) { | |
| 685 while (from != fromend && findMatch(*from)) | |
| 686 ++from; | |
| 687 while (from != fromend && !findMatch(*from)) | |
| 688 to[outc++] = *from++; | |
| 689 if (from == fromend) | |
| 690 break; | |
| 691 } | |
| 692 | |
| 693 data.shrink(outc); | |
| 694 | |
| 695 return adopt(data); | |
| 696 } | |
| 697 | |
| 698 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr fi
ndMatch) | |
| 699 { | |
| 700 if (is8Bit()) | |
| 701 return removeCharacters(characters8(), findMatch); | |
| 702 return removeCharacters(characters16(), findMatch); | |
| 703 } | |
| 704 | |
| 705 template <typename CharType, class UCharPredicate> | |
| 706 inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UChar
Predicate predicate) | |
| 707 { | |
| 708 StringBuffer<CharType> data(m_length); | |
| 709 | |
| 710 const CharType* from = getCharacters<CharType>(); | |
| 711 const CharType* fromend = from + m_length; | |
| 712 int outc = 0; | |
| 713 bool changedToSpace = false; | |
| 714 | |
| 715 CharType* to = data.characters(); | |
| 716 | |
| 717 while (true) { | |
| 718 while (from != fromend && predicate(*from)) { | |
| 719 if (*from != ' ') | |
| 720 changedToSpace = true; | |
| 721 ++from; | |
| 722 } | |
| 723 while (from != fromend && !predicate(*from)) | |
| 724 to[outc++] = *from++; | |
| 725 if (from != fromend) | |
| 726 to[outc++] = ' '; | |
| 727 else | |
| 728 break; | |
| 729 } | |
| 730 | |
| 731 if (outc > 0 && to[outc - 1] == ' ') | |
| 732 --outc; | |
| 733 | |
| 734 if (static_cast<unsigned>(outc) == m_length && !changedToSpace) | |
| 735 return this; | |
| 736 | |
| 737 data.shrink(outc); | |
| 738 | |
| 739 return adopt(data); | |
| 740 } | |
| 741 | |
| 742 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace() | |
| 743 { | |
| 744 if (is8Bit()) | |
| 745 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(SpaceOrNewlin
ePredicate()); | |
| 746 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(SpaceOrNewlinePre
dicate()); | |
| 747 } | |
| 748 | |
| 749 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr is
WhiteSpace) | |
| 750 { | |
| 751 if (is8Bit()) | |
| 752 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(UCharPredicat
e(isWhiteSpace)); | |
| 753 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(UCharPredicate(is
WhiteSpace)); | |
| 754 } | |
| 755 | |
| 756 int StringImpl::toIntStrict(bool* ok, int base) | |
| 757 { | |
| 758 if (is8Bit()) | |
| 759 return charactersToIntStrict(characters8(), m_length, ok, base); | |
| 760 return charactersToIntStrict(characters16(), m_length, ok, base); | |
| 761 } | |
| 762 | |
| 763 unsigned StringImpl::toUIntStrict(bool* ok, int base) | |
| 764 { | |
| 765 if (is8Bit()) | |
| 766 return charactersToUIntStrict(characters8(), m_length, ok, base); | |
| 767 return charactersToUIntStrict(characters16(), m_length, ok, base); | |
| 768 } | |
| 769 | |
| 770 int64_t StringImpl::toInt64Strict(bool* ok, int base) | |
| 771 { | |
| 772 if (is8Bit()) | |
| 773 return charactersToInt64Strict(characters8(), m_length, ok, base); | |
| 774 return charactersToInt64Strict(characters16(), m_length, ok, base); | |
| 775 } | |
| 776 | |
| 777 uint64_t StringImpl::toUInt64Strict(bool* ok, int base) | |
| 778 { | |
| 779 if (is8Bit()) | |
| 780 return charactersToUInt64Strict(characters8(), m_length, ok, base); | |
| 781 return charactersToUInt64Strict(characters16(), m_length, ok, base); | |
| 782 } | |
| 783 | |
| 784 intptr_t StringImpl::toIntPtrStrict(bool* ok, int base) | |
| 785 { | |
| 786 if (is8Bit()) | |
| 787 return charactersToIntPtrStrict(characters8(), m_length, ok, base); | |
| 788 return charactersToIntPtrStrict(characters16(), m_length, ok, base); | |
| 789 } | |
| 790 | |
| 791 int StringImpl::toInt(bool* ok) | |
| 792 { | |
| 793 if (is8Bit()) | |
| 794 return charactersToInt(characters8(), m_length, ok); | |
| 795 return charactersToInt(characters16(), m_length, ok); | |
| 796 } | |
| 797 | |
| 798 unsigned StringImpl::toUInt(bool* ok) | |
| 799 { | |
| 800 if (is8Bit()) | |
| 801 return charactersToUInt(characters8(), m_length, ok); | |
| 802 return charactersToUInt(characters16(), m_length, ok); | |
| 803 } | |
| 804 | |
| 805 int64_t StringImpl::toInt64(bool* ok) | |
| 806 { | |
| 807 if (is8Bit()) | |
| 808 return charactersToInt64(characters8(), m_length, ok); | |
| 809 return charactersToInt64(characters16(), m_length, ok); | |
| 810 } | |
| 811 | |
| 812 uint64_t StringImpl::toUInt64(bool* ok) | |
| 813 { | |
| 814 if (is8Bit()) | |
| 815 return charactersToUInt64(characters8(), m_length, ok); | |
| 816 return charactersToUInt64(characters16(), m_length, ok); | |
| 817 } | |
| 818 | |
| 819 intptr_t StringImpl::toIntPtr(bool* ok) | |
| 820 { | |
| 821 if (is8Bit()) | |
| 822 return charactersToIntPtr(characters8(), m_length, ok); | |
| 823 return charactersToIntPtr(characters16(), m_length, ok); | |
| 824 } | |
| 825 | |
| 826 double StringImpl::toDouble(bool* ok) | |
| 827 { | |
| 828 if (is8Bit()) | |
| 829 return charactersToDouble(characters8(), m_length, ok); | |
| 830 return charactersToDouble(characters16(), m_length, ok); | |
| 831 } | |
| 832 | |
| 833 float StringImpl::toFloat(bool* ok) | |
| 834 { | |
| 835 if (is8Bit()) | |
| 836 return charactersToFloat(characters8(), m_length, ok); | |
| 837 return charactersToFloat(characters16(), m_length, ok); | |
| 838 } | |
| 839 | |
| 840 bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length) | |
| 841 { | |
| 842 while (length--) { | |
| 843 LChar bc = *b++; | |
| 844 if (foldCase(*a++) != foldCase(bc)) | |
| 845 return false; | |
| 846 } | |
| 847 return true; | |
| 848 } | |
| 849 | |
| 850 bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length) | |
| 851 { | |
| 852 while (length--) { | |
| 853 LChar bc = *b++; | |
| 854 if (foldCase(*a++) != foldCase(bc)) | |
| 855 return false; | |
| 856 } | |
| 857 return true; | |
| 858 } | |
| 859 | |
| 860 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start) | |
| 861 { | |
| 862 if (is8Bit()) | |
| 863 return WTF::find(characters8(), m_length, matchFunction, start); | |
| 864 return WTF::find(characters16(), m_length, matchFunction, start); | |
| 865 } | |
| 866 | |
| 867 size_t StringImpl::find(const LChar* matchString, unsigned index) | |
| 868 { | |
| 869 // Check for null or empty string to match against | |
| 870 if (!matchString) | |
| 871 return notFound; | |
| 872 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString)
); | |
| 873 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max()); | |
| 874 unsigned matchLength = matchStringLength; | |
| 875 if (!matchLength) | |
| 876 return min(index, length()); | |
| 877 | |
| 878 // Optimization 1: fast case for strings of length 1. | |
| 879 if (matchLength == 1) | |
| 880 return WTF::find(characters16(), length(), *matchString, index); | |
| 881 | |
| 882 // Check index & matchLength are in range. | |
| 883 if (index > length()) | |
| 884 return notFound; | |
| 885 unsigned searchLength = length() - index; | |
| 886 if (matchLength > searchLength) | |
| 887 return notFound; | |
| 888 // delta is the number of additional times to test; delta == 0 means test on
ly once. | |
| 889 unsigned delta = searchLength - matchLength; | |
| 890 | |
| 891 const UChar* searchCharacters = characters() + index; | |
| 892 | |
| 893 // Optimization 2: keep a running hash of the strings, | |
| 894 // only call equal if the hashes match. | |
| 895 unsigned searchHash = 0; | |
| 896 unsigned matchHash = 0; | |
| 897 for (unsigned i = 0; i < matchLength; ++i) { | |
| 898 searchHash += searchCharacters[i]; | |
| 899 matchHash += matchString[i]; | |
| 900 } | |
| 901 | |
| 902 unsigned i = 0; | |
| 903 // keep looping until we match | |
| 904 while (searchHash != matchHash || !equal(searchCharacters + i, matchString,
matchLength)) { | |
| 905 if (i == delta) | |
| 906 return notFound; | |
| 907 searchHash += searchCharacters[i + matchLength]; | |
| 908 searchHash -= searchCharacters[i]; | |
| 909 ++i; | |
| 910 } | |
| 911 return index + i; | |
| 912 } | |
| 913 | |
| 914 size_t StringImpl::findIgnoringCase(const LChar* matchString, unsigned index) | |
| 915 { | |
| 916 // Check for null or empty string to match against | |
| 917 if (!matchString) | |
| 918 return notFound; | |
| 919 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString)
); | |
| 920 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max()); | |
| 921 unsigned matchLength = matchStringLength; | |
| 922 if (!matchLength) | |
| 923 return min(index, length()); | |
| 924 | |
| 925 // Check index & matchLength are in range. | |
| 926 if (index > length()) | |
| 927 return notFound; | |
| 928 unsigned searchLength = length() - index; | |
| 929 if (matchLength > searchLength) | |
| 930 return notFound; | |
| 931 // delta is the number of additional times to test; delta == 0 means test on
ly once. | |
| 932 unsigned delta = searchLength - matchLength; | |
| 933 | |
| 934 const UChar* searchCharacters = characters() + index; | |
| 935 | |
| 936 unsigned i = 0; | |
| 937 // keep looping until we match | |
| 938 while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) { | |
| 939 if (i == delta) | |
| 940 return notFound; | |
| 941 ++i; | |
| 942 } | |
| 943 return index + i; | |
| 944 } | |
| 945 | |
| 946 template <typename SearchCharacterType, typename MatchCharacterType> | |
| 947 ALWAYS_INLINE static size_t findInner(const SearchCharacterType* searchCharacter
s, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLen
gth, unsigned matchLength) | |
| 948 { | |
| 949 // Optimization: keep a running hash of the strings, | |
| 950 // only call equal() if the hashes match. | |
| 951 | |
| 952 // delta is the number of additional times to test; delta == 0 means test on
ly once. | |
| 953 unsigned delta = searchLength - matchLength; | |
| 954 | |
| 955 unsigned searchHash = 0; | |
| 956 unsigned matchHash = 0; | |
| 957 | |
| 958 for (unsigned i = 0; i < matchLength; ++i) { | |
| 959 searchHash += searchCharacters[i]; | |
| 960 matchHash += matchCharacters[i]; | |
| 961 } | |
| 962 | |
| 963 unsigned i = 0; | |
| 964 // keep looping until we match | |
| 965 while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacte
rs, matchLength)) { | |
| 966 if (i == delta) | |
| 967 return notFound; | |
| 968 searchHash += searchCharacters[i + matchLength]; | |
| 969 searchHash -= searchCharacters[i]; | |
| 970 ++i; | |
| 971 } | |
| 972 return index + i; | |
| 973 } | |
| 974 | |
| 975 size_t StringImpl::find(StringImpl* matchString) | |
| 976 { | |
| 977 // Check for null string to match against | |
| 978 if (UNLIKELY(!matchString)) | |
| 979 return notFound; | |
| 980 unsigned matchLength = matchString->length(); | |
| 981 | |
| 982 // Optimization 1: fast case for strings of length 1. | |
| 983 if (matchLength == 1) { | |
| 984 if (is8Bit()) { | |
| 985 if (matchString->is8Bit()) | |
| 986 return WTF::find(characters8(), length(), matchString->character
s8()[0]); | |
| 987 return WTF::find(characters8(), length(), matchString->characters16(
)[0]); | |
| 988 } | |
| 989 if (matchString->is8Bit()) | |
| 990 return WTF::find(characters16(), length(), matchString->characters8(
)[0]); | |
| 991 return WTF::find(characters16(), length(), matchString->characters16()[0
]); | |
| 992 } | |
| 993 | |
| 994 // Check matchLength is in range. | |
| 995 if (matchLength > length()) | |
| 996 return notFound; | |
| 997 | |
| 998 // Check for empty string to match against | |
| 999 if (UNLIKELY(!matchLength)) | |
| 1000 return 0; | |
| 1001 | |
| 1002 if (is8Bit()) { | |
| 1003 if (matchString->is8Bit()) | |
| 1004 return findInner(characters8(), matchString->characters8(), 0, lengt
h(), matchLength); | |
| 1005 return findInner(characters8(), matchString->characters16(), 0, length()
, matchLength); | |
| 1006 } | |
| 1007 | |
| 1008 if (matchString->is8Bit()) | |
| 1009 return findInner(characters16(), matchString->characters8(), 0, length()
, matchLength); | |
| 1010 | |
| 1011 return findInner(characters16(), matchString->characters16(), 0, length(), m
atchLength); | |
| 1012 } | |
| 1013 | |
| 1014 size_t StringImpl::find(StringImpl* matchString, unsigned index) | |
| 1015 { | |
| 1016 // Check for null or empty string to match against | |
| 1017 if (UNLIKELY(!matchString)) | |
| 1018 return notFound; | |
| 1019 | |
| 1020 unsigned matchLength = matchString->length(); | |
| 1021 | |
| 1022 // Optimization 1: fast case for strings of length 1. | |
| 1023 if (matchLength == 1) { | |
| 1024 if (is8Bit()) | |
| 1025 return WTF::find(characters8(), length(), (*matchString)[0], index); | |
| 1026 return WTF::find(characters16(), length(), (*matchString)[0], index); | |
| 1027 } | |
| 1028 | |
| 1029 if (UNLIKELY(!matchLength)) | |
| 1030 return min(index, length()); | |
| 1031 | |
| 1032 // Check index & matchLength are in range. | |
| 1033 if (index > length()) | |
| 1034 return notFound; | |
| 1035 unsigned searchLength = length() - index; | |
| 1036 if (matchLength > searchLength) | |
| 1037 return notFound; | |
| 1038 | |
| 1039 if (is8Bit()) { | |
| 1040 if (matchString->is8Bit()) | |
| 1041 return findInner(characters8() + index, matchString->characters8(),
index, searchLength, matchLength); | |
| 1042 return findInner(characters8() + index, matchString->characters16(), ind
ex, searchLength, matchLength); | |
| 1043 } | |
| 1044 | |
| 1045 if (matchString->is8Bit()) | |
| 1046 return findInner(characters16() + index, matchString->characters8(), ind
ex, searchLength, matchLength); | |
| 1047 | |
| 1048 return findInner(characters16() + index, matchString->characters16(), index,
searchLength, matchLength); | |
| 1049 } | |
| 1050 | |
| 1051 template <typename SearchCharacterType, typename MatchCharacterType> | |
| 1052 ALWAYS_INLINE static size_t findIgnoringCaseInner(const SearchCharacterType* sea
rchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsign
ed searchLength, unsigned matchLength) | |
| 1053 { | |
| 1054 // delta is the number of additional times to test; delta == 0 means test on
ly once. | |
| 1055 unsigned delta = searchLength - matchLength; | |
| 1056 | |
| 1057 unsigned i = 0; | |
| 1058 // keep looping until we match | |
| 1059 while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength
)) { | |
| 1060 if (i == delta) | |
| 1061 return notFound; | |
| 1062 ++i; | |
| 1063 } | |
| 1064 return index + i; | |
| 1065 } | |
| 1066 | |
| 1067 size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index) | |
| 1068 { | |
| 1069 // Check for null or empty string to match against | |
| 1070 if (!matchString) | |
| 1071 return notFound; | |
| 1072 unsigned matchLength = matchString->length(); | |
| 1073 if (!matchLength) | |
| 1074 return min(index, length()); | |
| 1075 | |
| 1076 // Check index & matchLength are in range. | |
| 1077 if (index > length()) | |
| 1078 return notFound; | |
| 1079 unsigned searchLength = length() - index; | |
| 1080 if (matchLength > searchLength) | |
| 1081 return notFound; | |
| 1082 | |
| 1083 if (is8Bit()) { | |
| 1084 if (matchString->is8Bit()) | |
| 1085 return findIgnoringCaseInner(characters8() + index, matchString->cha
racters8(), index, searchLength, matchLength); | |
| 1086 return findIgnoringCaseInner(characters8() + index, matchString->charact
ers16(), index, searchLength, matchLength); | |
| 1087 } | |
| 1088 | |
| 1089 if (matchString->is8Bit()) | |
| 1090 return findIgnoringCaseInner(characters16() + index, matchString->charac
ters8(), index, searchLength, matchLength); | |
| 1091 | |
| 1092 return findIgnoringCaseInner(characters16() + index, matchString->characters
16(), index, searchLength, matchLength); | |
| 1093 } | |
| 1094 | |
| 1095 size_t StringImpl::findNextLineStart(unsigned index) | |
| 1096 { | |
| 1097 if (is8Bit()) | |
| 1098 return WTF::findNextLineStart(characters8(), m_length, index); | |
| 1099 return WTF::findNextLineStart(characters16(), m_length, index); | |
| 1100 } | |
| 1101 | |
| 1102 size_t StringImpl::reverseFind(UChar c, unsigned index) | |
| 1103 { | |
| 1104 if (is8Bit()) | |
| 1105 return WTF::reverseFind(characters8(), m_length, c, index); | |
| 1106 return WTF::reverseFind(characters16(), m_length, c, index); | |
| 1107 } | |
| 1108 | |
| 1109 template <typename SearchCharacterType, typename MatchCharacterType> | |
| 1110 ALWAYS_INLINE static size_t reverseFindInner(const SearchCharacterType* searchCh
aracters, const MatchCharacterType* matchCharacters, unsigned index, unsigned le
ngth, unsigned matchLength) | |
| 1111 { | |
| 1112 // Optimization: keep a running hash of the strings, | |
| 1113 // only call equal if the hashes match. | |
| 1114 | |
| 1115 // delta is the number of additional times to test; delta == 0 means test on
ly once. | |
| 1116 unsigned delta = min(index, length - matchLength); | |
| 1117 | |
| 1118 unsigned searchHash = 0; | |
| 1119 unsigned matchHash = 0; | |
| 1120 for (unsigned i = 0; i < matchLength; ++i) { | |
| 1121 searchHash += searchCharacters[delta + i]; | |
| 1122 matchHash += matchCharacters[i]; | |
| 1123 } | |
| 1124 | |
| 1125 // keep looping until we match | |
| 1126 while (searchHash != matchHash || !equal(searchCharacters + delta, matchChar
acters, matchLength)) { | |
| 1127 if (!delta) | |
| 1128 return notFound; | |
| 1129 --delta; | |
| 1130 searchHash -= searchCharacters[delta + matchLength]; | |
| 1131 searchHash += searchCharacters[delta]; | |
| 1132 } | |
| 1133 return delta; | |
| 1134 } | |
| 1135 | |
| 1136 size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index) | |
| 1137 { | |
| 1138 // Check for null or empty string to match against | |
| 1139 if (!matchString) | |
| 1140 return notFound; | |
| 1141 unsigned matchLength = matchString->length(); | |
| 1142 unsigned ourLength = length(); | |
| 1143 if (!matchLength) | |
| 1144 return min(index, ourLength); | |
| 1145 | |
| 1146 // Optimization 1: fast case for strings of length 1. | |
| 1147 if (matchLength == 1) { | |
| 1148 if (is8Bit()) | |
| 1149 return WTF::reverseFind(characters8(), ourLength, (*matchString)[0],
index); | |
| 1150 return WTF::reverseFind(characters16(), ourLength, (*matchString)[0], in
dex); | |
| 1151 } | |
| 1152 | |
| 1153 // Check index & matchLength are in range. | |
| 1154 if (matchLength > ourLength) | |
| 1155 return notFound; | |
| 1156 | |
| 1157 if (is8Bit()) { | |
| 1158 if (matchString->is8Bit()) | |
| 1159 return reverseFindInner(characters8(), matchString->characters8(), i
ndex, ourLength, matchLength); | |
| 1160 return reverseFindInner(characters8(), matchString->characters16(), inde
x, ourLength, matchLength); | |
| 1161 } | |
| 1162 | |
| 1163 if (matchString->is8Bit()) | |
| 1164 return reverseFindInner(characters16(), matchString->characters8(), inde
x, ourLength, matchLength); | |
| 1165 | |
| 1166 return reverseFindInner(characters16(), matchString->characters16(), index,
ourLength, matchLength); | |
| 1167 } | |
| 1168 | |
| 1169 template <typename SearchCharacterType, typename MatchCharacterType> | |
| 1170 ALWAYS_INLINE static size_t reverseFindIgnoringCaseInner(const SearchCharacterTy
pe* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index,
unsigned length, unsigned matchLength) | |
| 1171 { | |
| 1172 // delta is the number of additional times to test; delta == 0 means test on
ly once. | |
| 1173 unsigned delta = min(index, length - matchLength); | |
| 1174 | |
| 1175 // keep looping until we match | |
| 1176 while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLe
ngth)) { | |
| 1177 if (!delta) | |
| 1178 return notFound; | |
| 1179 --delta; | |
| 1180 } | |
| 1181 return delta; | |
| 1182 } | |
| 1183 | |
| 1184 size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned ind
ex) | |
| 1185 { | |
| 1186 // Check for null or empty string to match against | |
| 1187 if (!matchString) | |
| 1188 return notFound; | |
| 1189 unsigned matchLength = matchString->length(); | |
| 1190 unsigned ourLength = length(); | |
| 1191 if (!matchLength) | |
| 1192 return min(index, ourLength); | |
| 1193 | |
| 1194 // Check index & matchLength are in range. | |
| 1195 if (matchLength > ourLength) | |
| 1196 return notFound; | |
| 1197 | |
| 1198 if (is8Bit()) { | |
| 1199 if (matchString->is8Bit()) | |
| 1200 return reverseFindIgnoringCaseInner(characters8(), matchString->char
acters8(), index, ourLength, matchLength); | |
| 1201 return reverseFindIgnoringCaseInner(characters8(), matchString->characte
rs16(), index, ourLength, matchLength); | |
| 1202 } | |
| 1203 | |
| 1204 if (matchString->is8Bit()) | |
| 1205 return reverseFindIgnoringCaseInner(characters16(), matchString->charact
ers8(), index, ourLength, matchLength); | |
| 1206 | |
| 1207 return reverseFindIgnoringCaseInner(characters16(), matchString->characters1
6(), index, ourLength, matchLength); | |
| 1208 } | |
| 1209 | |
| 1210 ALWAYS_INLINE static bool equalInner(const StringImpl* stringImpl, unsigned star
tOffset, const char* matchString, unsigned matchLength, bool caseSensitive) | |
| 1211 { | |
| 1212 ASSERT(stringImpl); | |
| 1213 ASSERT(matchLength <= stringImpl->length()); | |
| 1214 ASSERT(startOffset + matchLength <= stringImpl->length()); | |
| 1215 | |
| 1216 if (caseSensitive) { | |
| 1217 if (stringImpl->is8Bit()) | |
| 1218 return equal(stringImpl->characters8() + startOffset, reinterpret_ca
st<const LChar*>(matchString), matchLength); | |
| 1219 return equal(stringImpl->characters16() + startOffset, reinterpret_cast<
const LChar*>(matchString), matchLength); | |
| 1220 } | |
| 1221 if (stringImpl->is8Bit()) | |
| 1222 return equalIgnoringCase(stringImpl->characters8() + startOffset, reinte
rpret_cast<const LChar*>(matchString), matchLength); | |
| 1223 return equalIgnoringCase(stringImpl->characters16() + startOffset, reinterpr
et_cast<const LChar*>(matchString), matchLength); | |
| 1224 } | |
| 1225 | |
| 1226 bool StringImpl::startsWith(UChar character) const | |
| 1227 { | |
| 1228 return m_length && (*this)[0] == character; | |
| 1229 } | |
| 1230 | |
| 1231 bool StringImpl::startsWith(const char* matchString, unsigned matchLength, bool
caseSensitive) const | |
| 1232 { | |
| 1233 ASSERT(matchLength); | |
| 1234 if (matchLength > length()) | |
| 1235 return false; | |
| 1236 return equalInner(this, 0, matchString, matchLength, caseSensitive); | |
| 1237 } | |
| 1238 | |
| 1239 bool StringImpl::endsWith(StringImpl* matchString, bool caseSensitive) | |
| 1240 { | |
| 1241 ASSERT(matchString); | |
| 1242 if (m_length >= matchString->m_length) { | |
| 1243 unsigned start = m_length - matchString->m_length; | |
| 1244 return (caseSensitive ? find(matchString, start) : findIgnoringCase(matc
hString, start)) == start; | |
| 1245 } | |
| 1246 return false; | |
| 1247 } | |
| 1248 | |
| 1249 bool StringImpl::endsWith(UChar character) const | |
| 1250 { | |
| 1251 return m_length && (*this)[m_length - 1] == character; | |
| 1252 } | |
| 1253 | |
| 1254 bool StringImpl::endsWith(const char* matchString, unsigned matchLength, bool ca
seSensitive) const | |
| 1255 { | |
| 1256 ASSERT(matchLength); | |
| 1257 if (matchLength > length()) | |
| 1258 return false; | |
| 1259 unsigned startOffset = length() - matchLength; | |
| 1260 return equalInner(this, startOffset, matchString, matchLength, caseSensitive
); | |
| 1261 } | |
| 1262 | |
| 1263 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC) | |
| 1264 { | |
| 1265 if (oldC == newC) | |
| 1266 return this; | |
| 1267 unsigned i; | |
| 1268 for (i = 0; i != m_length; ++i) { | |
| 1269 UChar c = is8Bit() ? m_data8[i] : m_data16[i]; | |
| 1270 if (c == oldC) | |
| 1271 break; | |
| 1272 } | |
| 1273 if (i == m_length) | |
| 1274 return this; | |
| 1275 | |
| 1276 if (is8Bit()) { | |
| 1277 if (oldC > 0xff) | |
| 1278 // Looking for a 16 bit char in an 8 bit string, we're done. | |
| 1279 return this; | |
| 1280 | |
| 1281 if (newC <= 0xff) { | |
| 1282 LChar* data; | |
| 1283 LChar oldChar = static_cast<LChar>(oldC); | |
| 1284 LChar newChar = static_cast<LChar>(newC); | |
| 1285 | |
| 1286 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
| 1287 | |
| 1288 for (i = 0; i != m_length; ++i) { | |
| 1289 LChar ch = m_data8[i]; | |
| 1290 if (ch == oldChar) | |
| 1291 ch = newChar; | |
| 1292 data[i] = ch; | |
| 1293 } | |
| 1294 return newImpl.release(); | |
| 1295 } | |
| 1296 | |
| 1297 // There is the possibility we need to up convert from 8 to 16 bit, | |
| 1298 // create a 16 bit string for the result. | |
| 1299 UChar* data; | |
| 1300 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
| 1301 | |
| 1302 for (i = 0; i != m_length; ++i) { | |
| 1303 UChar ch = m_data8[i]; | |
| 1304 if (ch == oldC) | |
| 1305 ch = newC; | |
| 1306 data[i] = ch; | |
| 1307 } | |
| 1308 | |
| 1309 return newImpl.release(); | |
| 1310 } | |
| 1311 | |
| 1312 UChar* data; | |
| 1313 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
| 1314 | |
| 1315 for (i = 0; i != m_length; ++i) { | |
| 1316 UChar ch = m_data16[i]; | |
| 1317 if (ch == oldC) | |
| 1318 ch = newC; | |
| 1319 data[i] = ch; | |
| 1320 } | |
| 1321 return newImpl.release(); | |
| 1322 } | |
| 1323 | |
| 1324 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToR
eplace, StringImpl* str) | |
| 1325 { | |
| 1326 position = min(position, length()); | |
| 1327 lengthToReplace = min(lengthToReplace, length() - position); | |
| 1328 unsigned lengthToInsert = str ? str->length() : 0; | |
| 1329 if (!lengthToReplace && !lengthToInsert) | |
| 1330 return this; | |
| 1331 | |
| 1332 RELEASE_ASSERT((length() - lengthToReplace) < (numeric_limits<unsigned>::max
() - lengthToInsert)); | |
| 1333 | |
| 1334 if (is8Bit() && (!str || str->is8Bit())) { | |
| 1335 LChar* data; | |
| 1336 RefPtr<StringImpl> newImpl = | |
| 1337 createUninitialized(length() - lengthToReplace + lengthToInsert, data); | |
| 1338 memcpy(data, m_data8, position * sizeof(LChar)); | |
| 1339 if (str) | |
| 1340 memcpy(data + position, str->m_data8, lengthToInsert * sizeof(LChar)
); | |
| 1341 memcpy(data + position + lengthToInsert, m_data8 + position + lengthToRe
place, | |
| 1342 (length() - position - lengthToReplace) * sizeof(LChar)); | |
| 1343 return newImpl.release(); | |
| 1344 } | |
| 1345 UChar* data; | |
| 1346 RefPtr<StringImpl> newImpl = | |
| 1347 createUninitialized(length() - lengthToReplace + lengthToInsert, data); | |
| 1348 if (is8Bit()) | |
| 1349 for (unsigned i = 0; i < position; ++i) | |
| 1350 data[i] = m_data8[i]; | |
| 1351 else | |
| 1352 memcpy(data, m_data16, position * sizeof(UChar)); | |
| 1353 if (str) { | |
| 1354 if (str->is8Bit()) | |
| 1355 for (unsigned i = 0; i < lengthToInsert; ++i) | |
| 1356 data[i + position] = str->m_data8[i]; | |
| 1357 else | |
| 1358 memcpy(data + position, str->m_data16, lengthToInsert * sizeof(UChar
)); | |
| 1359 } | |
| 1360 if (is8Bit()) { | |
| 1361 for (unsigned i = 0; i < length() - position - lengthToReplace; ++i) | |
| 1362 data[i + position + lengthToInsert] = m_data8[i + position + lengthT
oReplace]; | |
| 1363 } else { | |
| 1364 memcpy(data + position + lengthToInsert, characters() + position + lengt
hToReplace, | |
| 1365 (length() - position - lengthToReplace) * sizeof(UChar)); | |
| 1366 } | |
| 1367 return newImpl.release(); | |
| 1368 } | |
| 1369 | |
| 1370 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacemen
t) | |
| 1371 { | |
| 1372 if (!replacement) | |
| 1373 return this; | |
| 1374 | |
| 1375 if (replacement->is8Bit()) | |
| 1376 return replace(pattern, replacement->m_data8, replacement->length()); | |
| 1377 | |
| 1378 return replace(pattern, replacement->m_data16, replacement->length()); | |
| 1379 } | |
| 1380 | |
| 1381 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const LChar* replaceme
nt, unsigned repStrLength) | |
| 1382 { | |
| 1383 ASSERT(replacement); | |
| 1384 | |
| 1385 size_t srcSegmentStart = 0; | |
| 1386 unsigned matchCount = 0; | |
| 1387 | |
| 1388 // Count the matches. | |
| 1389 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) { | |
| 1390 ++matchCount; | |
| 1391 ++srcSegmentStart; | |
| 1392 } | |
| 1393 | |
| 1394 // If we have 0 matches then we don't have to do any more work. | |
| 1395 if (!matchCount) | |
| 1396 return this; | |
| 1397 | |
| 1398 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max(
) / repStrLength); | |
| 1399 | |
| 1400 unsigned replaceSize = matchCount * repStrLength; | |
| 1401 unsigned newSize = m_length - matchCount; | |
| 1402 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize)); | |
| 1403 | |
| 1404 newSize += replaceSize; | |
| 1405 | |
| 1406 // Construct the new data. | |
| 1407 size_t srcSegmentEnd; | |
| 1408 unsigned srcSegmentLength; | |
| 1409 srcSegmentStart = 0; | |
| 1410 unsigned dstOffset = 0; | |
| 1411 | |
| 1412 if (is8Bit()) { | |
| 1413 LChar* data; | |
| 1414 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | |
| 1415 | |
| 1416 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) { | |
| 1417 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
| 1418 memcpy(data + dstOffset, m_data8 + srcSegmentStart, srcSegmentLength
* sizeof(LChar)); | |
| 1419 dstOffset += srcSegmentLength; | |
| 1420 memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar)); | |
| 1421 dstOffset += repStrLength; | |
| 1422 srcSegmentStart = srcSegmentEnd + 1; | |
| 1423 } | |
| 1424 | |
| 1425 srcSegmentLength = m_length - srcSegmentStart; | |
| 1426 memcpy(data + dstOffset, m_data8 + srcSegmentStart, srcSegmentLength * s
izeof(LChar)); | |
| 1427 | |
| 1428 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); | |
| 1429 | |
| 1430 return newImpl.release(); | |
| 1431 } | |
| 1432 | |
| 1433 UChar* data; | |
| 1434 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | |
| 1435 | |
| 1436 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) { | |
| 1437 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
| 1438 memcpy(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength *
sizeof(UChar)); | |
| 1439 | |
| 1440 dstOffset += srcSegmentLength; | |
| 1441 for (unsigned i = 0; i < repStrLength; ++i) | |
| 1442 data[i + dstOffset] = replacement[i]; | |
| 1443 | |
| 1444 dstOffset += repStrLength; | |
| 1445 srcSegmentStart = srcSegmentEnd + 1; | |
| 1446 } | |
| 1447 | |
| 1448 srcSegmentLength = m_length - srcSegmentStart; | |
| 1449 memcpy(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength * size
of(UChar)); | |
| 1450 | |
| 1451 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); | |
| 1452 | |
| 1453 return newImpl.release(); | |
| 1454 } | |
| 1455 | |
| 1456 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const UChar* replaceme
nt, unsigned repStrLength) | |
| 1457 { | |
| 1458 ASSERT(replacement); | |
| 1459 | |
| 1460 size_t srcSegmentStart = 0; | |
| 1461 unsigned matchCount = 0; | |
| 1462 | |
| 1463 // Count the matches. | |
| 1464 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) { | |
| 1465 ++matchCount; | |
| 1466 ++srcSegmentStart; | |
| 1467 } | |
| 1468 | |
| 1469 // If we have 0 matches then we don't have to do any more work. | |
| 1470 if (!matchCount) | |
| 1471 return this; | |
| 1472 | |
| 1473 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max(
) / repStrLength); | |
| 1474 | |
| 1475 unsigned replaceSize = matchCount * repStrLength; | |
| 1476 unsigned newSize = m_length - matchCount; | |
| 1477 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize)); | |
| 1478 | |
| 1479 newSize += replaceSize; | |
| 1480 | |
| 1481 // Construct the new data. | |
| 1482 size_t srcSegmentEnd; | |
| 1483 unsigned srcSegmentLength; | |
| 1484 srcSegmentStart = 0; | |
| 1485 unsigned dstOffset = 0; | |
| 1486 | |
| 1487 if (is8Bit()) { | |
| 1488 UChar* data; | |
| 1489 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | |
| 1490 | |
| 1491 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) { | |
| 1492 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
| 1493 for (unsigned i = 0; i < srcSegmentLength; ++i) | |
| 1494 data[i + dstOffset] = m_data8[i + srcSegmentStart]; | |
| 1495 | |
| 1496 dstOffset += srcSegmentLength; | |
| 1497 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar)); | |
| 1498 | |
| 1499 dstOffset += repStrLength; | |
| 1500 srcSegmentStart = srcSegmentEnd + 1; | |
| 1501 } | |
| 1502 | |
| 1503 srcSegmentLength = m_length - srcSegmentStart; | |
| 1504 for (unsigned i = 0; i < srcSegmentLength; ++i) | |
| 1505 data[i + dstOffset] = m_data8[i + srcSegmentStart]; | |
| 1506 | |
| 1507 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); | |
| 1508 | |
| 1509 return newImpl.release(); | |
| 1510 } | |
| 1511 | |
| 1512 UChar* data; | |
| 1513 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | |
| 1514 | |
| 1515 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) { | |
| 1516 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
| 1517 memcpy(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength *
sizeof(UChar)); | |
| 1518 | |
| 1519 dstOffset += srcSegmentLength; | |
| 1520 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar)); | |
| 1521 | |
| 1522 dstOffset += repStrLength; | |
| 1523 srcSegmentStart = srcSegmentEnd + 1; | |
| 1524 } | |
| 1525 | |
| 1526 srcSegmentLength = m_length - srcSegmentStart; | |
| 1527 memcpy(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength * size
of(UChar)); | |
| 1528 | |
| 1529 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); | |
| 1530 | |
| 1531 return newImpl.release(); | |
| 1532 } | |
| 1533 | |
| 1534 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* repl
acement) | |
| 1535 { | |
| 1536 if (!pattern || !replacement) | |
| 1537 return this; | |
| 1538 | |
| 1539 unsigned patternLength = pattern->length(); | |
| 1540 if (!patternLength) | |
| 1541 return this; | |
| 1542 | |
| 1543 unsigned repStrLength = replacement->length(); | |
| 1544 size_t srcSegmentStart = 0; | |
| 1545 unsigned matchCount = 0; | |
| 1546 | |
| 1547 // Count the matches. | |
| 1548 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) { | |
| 1549 ++matchCount; | |
| 1550 srcSegmentStart += patternLength; | |
| 1551 } | |
| 1552 | |
| 1553 // If we have 0 matches, we don't have to do any more work | |
| 1554 if (!matchCount) | |
| 1555 return this; | |
| 1556 | |
| 1557 unsigned newSize = m_length - matchCount * patternLength; | |
| 1558 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max(
) / repStrLength); | |
| 1559 | |
| 1560 RELEASE_ASSERT(newSize <= (numeric_limits<unsigned>::max() - matchCount * re
pStrLength)); | |
| 1561 | |
| 1562 newSize += matchCount * repStrLength; | |
| 1563 | |
| 1564 | |
| 1565 // Construct the new data | |
| 1566 size_t srcSegmentEnd; | |
| 1567 unsigned srcSegmentLength; | |
| 1568 srcSegmentStart = 0; | |
| 1569 unsigned dstOffset = 0; | |
| 1570 bool srcIs8Bit = is8Bit(); | |
| 1571 bool replacementIs8Bit = replacement->is8Bit(); | |
| 1572 | |
| 1573 // There are 4 cases: | |
| 1574 // 1. This and replacement are both 8 bit. | |
| 1575 // 2. This and replacement are both 16 bit. | |
| 1576 // 3. This is 8 bit and replacement is 16 bit. | |
| 1577 // 4. This is 16 bit and replacement is 8 bit. | |
| 1578 if (srcIs8Bit && replacementIs8Bit) { | |
| 1579 // Case 1 | |
| 1580 LChar* data; | |
| 1581 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | |
| 1582 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) { | |
| 1583 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
| 1584 memcpy(data + dstOffset, m_data8 + srcSegmentStart, srcSegmentLength
* sizeof(LChar)); | |
| 1585 dstOffset += srcSegmentLength; | |
| 1586 memcpy(data + dstOffset, replacement->m_data8, repStrLength * sizeof
(LChar)); | |
| 1587 dstOffset += repStrLength; | |
| 1588 srcSegmentStart = srcSegmentEnd + patternLength; | |
| 1589 } | |
| 1590 | |
| 1591 srcSegmentLength = m_length - srcSegmentStart; | |
| 1592 memcpy(data + dstOffset, m_data8 + srcSegmentStart, srcSegmentLength * s
izeof(LChar)); | |
| 1593 | |
| 1594 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); | |
| 1595 | |
| 1596 return newImpl.release(); | |
| 1597 } | |
| 1598 | |
| 1599 UChar* data; | |
| 1600 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | |
| 1601 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) { | |
| 1602 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
| 1603 if (srcIs8Bit) { | |
| 1604 // Case 3. | |
| 1605 for (unsigned i = 0; i < srcSegmentLength; ++i) | |
| 1606 data[i + dstOffset] = m_data8[i + srcSegmentStart]; | |
| 1607 } else { | |
| 1608 // Case 2 & 4. | |
| 1609 memcpy(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLengt
h * sizeof(UChar)); | |
| 1610 } | |
| 1611 dstOffset += srcSegmentLength; | |
| 1612 if (replacementIs8Bit) { | |
| 1613 // Cases 2 & 3. | |
| 1614 for (unsigned i = 0; i < repStrLength; ++i) | |
| 1615 data[i + dstOffset] = replacement->m_data8[i]; | |
| 1616 } else { | |
| 1617 // Case 4 | |
| 1618 memcpy(data + dstOffset, replacement->m_data16, repStrLength * sizeo
f(UChar)); | |
| 1619 } | |
| 1620 dstOffset += repStrLength; | |
| 1621 srcSegmentStart = srcSegmentEnd + patternLength; | |
| 1622 } | |
| 1623 | |
| 1624 srcSegmentLength = m_length - srcSegmentStart; | |
| 1625 if (srcIs8Bit) { | |
| 1626 // Case 3. | |
| 1627 for (unsigned i = 0; i < srcSegmentLength; ++i) | |
| 1628 data[i + dstOffset] = m_data8[i + srcSegmentStart]; | |
| 1629 } else { | |
| 1630 // Cases 2 & 4. | |
| 1631 memcpy(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength *
sizeof(UChar)); | |
| 1632 } | |
| 1633 | |
| 1634 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); | |
| 1635 | |
| 1636 return newImpl.release(); | |
| 1637 } | |
| 1638 | |
| 1639 static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl*
b) | |
| 1640 { | |
| 1641 unsigned aLength = a->length(); | |
| 1642 unsigned bLength = b->length(); | |
| 1643 if (aLength != bLength) | |
| 1644 return false; | |
| 1645 | |
| 1646 if (a->is8Bit()) { | |
| 1647 if (b->is8Bit()) | |
| 1648 return equal(a->characters8(), b->characters8(), aLength); | |
| 1649 | |
| 1650 return equal(a->characters8(), b->characters16(), aLength); | |
| 1651 } | |
| 1652 | |
| 1653 if (b->is8Bit()) | |
| 1654 return equal(a->characters16(), b->characters8(), aLength); | |
| 1655 | |
| 1656 return equal(a->characters16(), b->characters16(), aLength); | |
| 1657 } | |
| 1658 | |
| 1659 bool equal(const StringImpl* a, const StringImpl* b) | |
| 1660 { | |
| 1661 if (a == b) | |
| 1662 return true; | |
| 1663 if (!a || !b) | |
| 1664 return false; | |
| 1665 | |
| 1666 return stringImplContentEqual(a, b); | |
| 1667 } | |
| 1668 | |
| 1669 bool equal(const StringImpl* a, const LChar* b, unsigned length) | |
| 1670 { | |
| 1671 if (!a) | |
| 1672 return !b; | |
| 1673 if (!b) | |
| 1674 return !a; | |
| 1675 | |
| 1676 if (length != a->length()) | |
| 1677 return false; | |
| 1678 | |
| 1679 if (a->is8Bit()) | |
| 1680 return equal(a->characters8(), b, length); | |
| 1681 return equal(a->characters16(), b, length); | |
| 1682 } | |
| 1683 | |
| 1684 bool equal(const StringImpl* a, const LChar* b) | |
| 1685 { | |
| 1686 if (!a) | |
| 1687 return !b; | |
| 1688 if (!b) | |
| 1689 return !a; | |
| 1690 | |
| 1691 unsigned length = a->length(); | |
| 1692 | |
| 1693 if (a->is8Bit()) { | |
| 1694 const LChar* aPtr = a->characters8(); | |
| 1695 for (unsigned i = 0; i != length; ++i) { | |
| 1696 LChar bc = b[i]; | |
| 1697 LChar ac = aPtr[i]; | |
| 1698 if (!bc) | |
| 1699 return false; | |
| 1700 if (ac != bc) | |
| 1701 return false; | |
| 1702 } | |
| 1703 | |
| 1704 return !b[length]; | |
| 1705 } | |
| 1706 | |
| 1707 const UChar* aPtr = a->characters16(); | |
| 1708 for (unsigned i = 0; i != length; ++i) { | |
| 1709 LChar bc = b[i]; | |
| 1710 if (!bc) | |
| 1711 return false; | |
| 1712 if (aPtr[i] != bc) | |
| 1713 return false; | |
| 1714 } | |
| 1715 | |
| 1716 return !b[length]; | |
| 1717 } | |
| 1718 | |
| 1719 bool equal(const StringImpl* a, const UChar* b, unsigned length) | |
| 1720 { | |
| 1721 if (!a) | |
| 1722 return !b; | |
| 1723 if (!b) | |
| 1724 return false; | |
| 1725 | |
| 1726 if (a->length() != length) | |
| 1727 return false; | |
| 1728 if (a->is8Bit()) | |
| 1729 return equal(a->characters8(), b, length); | |
| 1730 return equal(a->characters16(), b, length); | |
| 1731 } | |
| 1732 | |
| 1733 bool equalNonNull(const StringImpl* a, const StringImpl* b) | |
| 1734 { | |
| 1735 ASSERT(a && b); | |
| 1736 if (a == b) | |
| 1737 return true; | |
| 1738 | |
| 1739 return stringImplContentEqual(a, b); | |
| 1740 } | |
| 1741 | |
| 1742 bool equalIgnoringCase(const StringImpl* a, const StringImpl* b) | |
| 1743 { | |
| 1744 if (a == b) | |
| 1745 return true; | |
| 1746 if (!a || !b) | |
| 1747 return false; | |
| 1748 | |
| 1749 return CaseFoldingHash::equal(a, b); | |
| 1750 } | |
| 1751 | |
| 1752 bool equalIgnoringCase(const StringImpl* a, const LChar* b) | |
| 1753 { | |
| 1754 if (!a) | |
| 1755 return !b; | |
| 1756 if (!b) | |
| 1757 return !a; | |
| 1758 | |
| 1759 unsigned length = a->length(); | |
| 1760 | |
| 1761 // Do a faster loop for the case where all the characters are ASCII. | |
| 1762 UChar ored = 0; | |
| 1763 bool equal = true; | |
| 1764 if (a->is8Bit()) { | |
| 1765 const LChar* as = a->characters8(); | |
| 1766 for (unsigned i = 0; i != length; ++i) { | |
| 1767 LChar bc = b[i]; | |
| 1768 if (!bc) | |
| 1769 return false; | |
| 1770 UChar ac = as[i]; | |
| 1771 ored |= ac; | |
| 1772 equal = equal && (toASCIILower(ac) == toASCIILower(bc)); | |
| 1773 } | |
| 1774 | |
| 1775 // Do a slower implementation for cases that include non-ASCII character
s. | |
| 1776 if (ored & ~0x7F) { | |
| 1777 equal = true; | |
| 1778 for (unsigned i = 0; i != length; ++i) | |
| 1779 equal = equal && (foldCase(as[i]) == foldCase(b[i])); | |
| 1780 } | |
| 1781 | |
| 1782 return equal && !b[length]; | |
| 1783 } | |
| 1784 | |
| 1785 const UChar* as = a->characters16(); | |
| 1786 for (unsigned i = 0; i != length; ++i) { | |
| 1787 LChar bc = b[i]; | |
| 1788 if (!bc) | |
| 1789 return false; | |
| 1790 UChar ac = as[i]; | |
| 1791 ored |= ac; | |
| 1792 equal = equal && (toASCIILower(ac) == toASCIILower(bc)); | |
| 1793 } | |
| 1794 | |
| 1795 // Do a slower implementation for cases that include non-ASCII characters. | |
| 1796 if (ored & ~0x7F) { | |
| 1797 equal = true; | |
| 1798 for (unsigned i = 0; i != length; ++i) { | |
| 1799 equal = equal && (foldCase(as[i]) == foldCase(b[i])); | |
| 1800 } | |
| 1801 } | |
| 1802 | |
| 1803 return equal && !b[length]; | |
| 1804 } | |
| 1805 | |
| 1806 bool equalIgnoringCaseNonNull(const StringImpl* a, const StringImpl* b) | |
| 1807 { | |
| 1808 ASSERT(a && b); | |
| 1809 if (a == b) | |
| 1810 return true; | |
| 1811 | |
| 1812 unsigned length = a->length(); | |
| 1813 if (length != b->length()) | |
| 1814 return false; | |
| 1815 | |
| 1816 if (a->is8Bit()) { | |
| 1817 if (b->is8Bit()) | |
| 1818 return equalIgnoringCase(a->characters8(), b->characters8(), length)
; | |
| 1819 | |
| 1820 return equalIgnoringCase(b->characters16(), a->characters8(), length); | |
| 1821 } | |
| 1822 | |
| 1823 if (b->is8Bit()) | |
| 1824 return equalIgnoringCase(a->characters16(), b->characters8(), length); | |
| 1825 | |
| 1826 return equalIgnoringCase(a->characters16(), b->characters16(), length); | |
| 1827 } | |
| 1828 | |
| 1829 bool equalIgnoringNullity(StringImpl* a, StringImpl* b) | |
| 1830 { | |
| 1831 if (!a && b && !b->length()) | |
| 1832 return true; | |
| 1833 if (!b && a && !a->length()) | |
| 1834 return true; | |
| 1835 return equal(a, b); | |
| 1836 } | |
| 1837 | |
| 1838 WTF::Unicode::Direction StringImpl::defaultWritingDirection(bool* hasStrongDirec
tionality) | |
| 1839 { | |
| 1840 for (unsigned i = 0; i < m_length; ++i) { | |
| 1841 WTF::Unicode::Direction charDirection = WTF::Unicode::direction(is8Bit()
? m_data8[i] : m_data16[i]); | |
| 1842 if (charDirection == WTF::Unicode::LeftToRight) { | |
| 1843 if (hasStrongDirectionality) | |
| 1844 *hasStrongDirectionality = true; | |
| 1845 return WTF::Unicode::LeftToRight; | |
| 1846 } | |
| 1847 if (charDirection == WTF::Unicode::RightToLeft || charDirection == WTF::
Unicode::RightToLeftArabic) { | |
| 1848 if (hasStrongDirectionality) | |
| 1849 *hasStrongDirectionality = true; | |
| 1850 return WTF::Unicode::RightToLeft; | |
| 1851 } | |
| 1852 } | |
| 1853 if (hasStrongDirectionality) | |
| 1854 *hasStrongDirectionality = false; | |
| 1855 return WTF::Unicode::LeftToRight; | |
| 1856 } | |
| 1857 | |
| 1858 PassRefPtr<StringImpl> StringImpl::adopt(StringBuffer<LChar>& buffer) | |
| 1859 { | |
| 1860 unsigned length = buffer.length(); | |
| 1861 if (!length) | |
| 1862 return empty(); | |
| 1863 return adoptRef(new StringImpl(buffer.release(), length)); | |
| 1864 } | |
| 1865 | |
| 1866 PassRefPtr<StringImpl> StringImpl::adopt(StringBuffer<UChar>& buffer) | |
| 1867 { | |
| 1868 unsigned length = buffer.length(); | |
| 1869 if (!length) | |
| 1870 return empty(); | |
| 1871 return adoptRef(new StringImpl(buffer.release(), length)); | |
| 1872 } | |
| 1873 | |
| 1874 PassRefPtr<StringImpl> StringImpl::createWithTerminatingNullCharacter(const Stri
ngImpl& string) | |
| 1875 { | |
| 1876 // Use createUninitialized instead of 'new StringImpl' so that the string an
d its buffer | |
| 1877 // get allocated in a single memory block. | |
| 1878 unsigned length = string.m_length; | |
| 1879 RELEASE_ASSERT(length < numeric_limits<unsigned>::max()); | |
| 1880 RefPtr<StringImpl> terminatedString; | |
| 1881 if (string.is8Bit()) { | |
| 1882 LChar* data; | |
| 1883 terminatedString = createUninitialized(length + 1, data); | |
| 1884 memcpy(data, string.m_data8, length * sizeof(LChar)); | |
| 1885 data[length] = 0; | |
| 1886 } else { | |
| 1887 UChar* data; | |
| 1888 terminatedString = createUninitialized(length + 1, data); | |
| 1889 memcpy(data, string.m_data16, length * sizeof(UChar)); | |
| 1890 data[length] = 0; | |
| 1891 } | |
| 1892 --(terminatedString->m_length); | |
| 1893 terminatedString->m_hashAndFlags = (string.m_hashAndFlags & (~s_flagMask | s
_hashFlag8BitBuffer)) | s_hashFlagHasTerminatingNullCharacter; | |
| 1894 return terminatedString.release(); | |
| 1895 } | |
| 1896 | |
| 1897 size_t StringImpl::sizeInBytes() const | |
| 1898 { | |
| 1899 // FIXME: support substrings | |
| 1900 size_t size = length(); | |
| 1901 if (is8Bit()) { | |
| 1902 if (has16BitShadow()) { | |
| 1903 size += 2 * size; | |
| 1904 if (hasTerminatingNullCharacter()) | |
| 1905 size += 2; | |
| 1906 } | |
| 1907 } else | |
| 1908 size *= 2; | |
| 1909 return size + sizeof(*this); | |
| 1910 } | |
| 1911 | |
| 1912 } // namespace WTF | |
| OLD | NEW |