third_party/WebKit/Source/wtf/text/StringImpl.cpp - Issue 2764243002: Move files in wtf/ to platform/wtf/ (Part 9).

Side by Side Diff: third_party/WebKit/Source/wtf/text/StringImpl.cpp

Issue 2764243002: Move files in wtf/ to platform/wtf/ (Part 9). (Closed)

Patch Set: Rebase. Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 /*

2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)

3 * (C) 1999 Antti Koivisto (koivisto@kde.org)

4 * (C) 2001 Dirk Mueller ( mueller@kde.org )

5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All

6 * rights reserved.

7 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)

8 *

9 * This library is free software; you can redistribute it and/or

10 * modify it under the terms of the GNU Library General Public

11 * License as published by the Free Software Foundation; either

12 * version 2 of the License, or (at your option) any later version.

13 *

14 * This library is distributed in the hope that it will be useful,

15 * but WITHOUT ANY WARRANTY; without even the implied warranty of

16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

17 * Library General Public License for more details.

18 *

19 * You should have received a copy of the GNU Library General Public License

20 * along with this library; see the file COPYING.LIB. If not, write to

21 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,

22 * Boston, MA 02110-1301, USA.

23 *

24 */

25

26 #include "wtf/text/StringImpl.h"

27

28 #include "wtf/DynamicAnnotations.h"

29 #include "wtf/LeakAnnotations.h"

30 #include "wtf/PtrUtil.h"

31 #include "wtf/StaticConstructors.h"

32 #include "wtf/StdLibExtras.h"

33 #include "wtf/allocator/Partitions.h"

34 #include "wtf/text/AtomicString.h"

35 #include "wtf/text/AtomicStringTable.h"

36 #include "wtf/text/CString.h"

37 #include "wtf/text/CharacterNames.h"

38 #include "wtf/text/StringBuffer.h"

39 #include "wtf/text/StringHash.h"

40 #include "wtf/text/StringToNumber.h"

41 #include <algorithm>

42 #include <memory>

43

44 #ifdef STRING_STATS

45 #include "wtf/DataLog.h"

46 #include "wtf/HashMap.h"

47 #include "wtf/HashSet.h"

48 #include "wtf/RefCounted.h"

49 #include "wtf/ThreadingPrimitives.h"

50 #include <unistd.h>

51 #endif

52

53 using namespace std;

54

55 namespace WTF {

56

57 using namespace Unicode;

58

59 // As of Jan 2017, StringImpl needs 2 * sizeof(int) + 29 bits of data, and

60 // sizeof(ThreadRestrictionVerifier) is 16 bytes. Thus, in DCHECK mode the

61 // class may be padded to 32 bytes.

62 #if DCHECK_IS_ON()

63 static_assert(sizeof(StringImpl) <= 8 * sizeof(int),

64 "StringImpl should stay small");

65 #else

66 static_assert(sizeof(StringImpl) <= 3 * sizeof(int),

67 "StringImpl should stay small");

68 #endif

69

70 #ifdef STRING_STATS

71

72 static Mutex& statsMutex() {

73 DEFINE_STATIC_LOCAL(Mutex, mutex, ());

74 return mutex;

75 }

76

77 static HashSet<void*>& liveStrings() {

78 // Notice that we can't use HashSet<StringImpl*> because then HashSet would

79 // dedup identical strings.

80 DEFINE_STATIC_LOCAL(HashSet<void*>, strings, ());

81 return strings;

82 }

83

84 void addStringForStats(StringImpl* string) {

85 MutexLocker locker(statsMutex());

86 liveStrings().add(string);

87 }

88

89 void removeStringForStats(StringImpl* string) {

90 MutexLocker locker(statsMutex());

91 liveStrings().remove(string);

92 }

93

94 static void fillWithSnippet(const StringImpl* string, Vector<char>& snippet) {

95 const unsigned kMaxSnippetLength = 64;

96 snippet.clear();

97

98 size_t expectedLength = std::min(string->length(), kMaxSnippetLength);

99 if (expectedLength == kMaxSnippetLength)

100 expectedLength += 3; // For the "...".

101 ++expectedLength; // For the terminating '\0'.

102 snippet.reserveCapacity(expectedLength);

103

104 size_t i;

105 for (i = 0; i < string->length() && i < kMaxSnippetLength; ++i) {

106 UChar c = (*string)[i];

107 if (isASCIIPrintable(c))

108 snippet.append(c);

109 else

110 snippet.append('?');

111 }

112 if (i < string->length()) {

113 snippet.append('.');

114 snippet.append('.');

115 snippet.append('.');

116 }

117 snippet.append('\0');

118 }

119

120 static bool isUnnecessarilyWide(const StringImpl* string) {

121 if (string->is8Bit())

122 return false;

123 UChar c = 0;

124 for (unsigned i = 0; i < string->length(); ++i)

125 c \|= (*string)[i] >> 8;

126 return !c;

127 }

128

129 class PerStringStats : public RefCounted<PerStringStats> {

130 public:

131 static PassRefPtr<PerStringStats> create() {

132 return adoptRef(new PerStringStats);

133 }

134

135 void add(const StringImpl* string) {

136 ++m_numberOfCopies;

137 if (!m_length) {

138 m_length = string->length();

139 fillWithSnippet(string, m_snippet);

140 }

141 if (string->isAtomic())

142 ++m_numberOfAtomicCopies;

143 if (isUnnecessarilyWide(string))

144 m_unnecessarilyWide = true;

145 }

146

147 size_t totalCharacters() const { return m_numberOfCopies * m_length; }

148

149 void print() {

150 const char* status = "ok";

151 if (m_unnecessarilyWide)

152 status = "16";

153 dataLogF("%8u copies (%s) of length %8u %s\n", m_numberOfCopies, status,

154 m_length, m_snippet.data());

155 }

156

157 bool m_unnecessarilyWide;

158 unsigned m_numberOfCopies;

159 unsigned m_length;

160 unsigned m_numberOfAtomicCopies;

161 Vector<char> m_snippet;

162

163 private:

164 PerStringStats()

165 : m_unnecessarilyWide(false),

166 m_numberOfCopies(0),

167 m_length(0),

168 m_numberOfAtomicCopies(0) {}

169 };

170

171 bool operator<(const RefPtr<PerStringStats>& a,

172 const RefPtr<PerStringStats>& b) {

173 if (a->m_unnecessarilyWide != b->m_unnecessarilyWide)

174 return !a->m_unnecessarilyWide && b->m_unnecessarilyWide;

175 if (a->totalCharacters() != b->totalCharacters())

176 return a->totalCharacters() < b->totalCharacters();

177 if (a->m_numberOfCopies != b->m_numberOfCopies)

178 return a->m_numberOfCopies < b->m_numberOfCopies;

179 if (a->m_length != b->m_length)

180 return a->m_length < b->m_length;

181 return a->m_numberOfAtomicCopies < b->m_numberOfAtomicCopies;

182 }

183

184 static void printLiveStringStats(void*) {

185 MutexLocker locker(statsMutex());

186 HashSet<void*>& strings = liveStrings();

187

188 HashMap<StringImpl*, RefPtr<PerStringStats>> stats;

189 for (HashSet<void*>::iterator iter = strings.begin(); iter != strings.end();

190 ++iter) {

191 StringImpl* string = static_cast<StringImpl>(iter);

192 HashMap<StringImpl*, RefPtr<PerStringStats>>::iterator entry =

193 stats.find(string);

194 RefPtr<PerStringStats> value =

195 entry == stats.end() ? RefPtr<PerStringStats>(PerStringStats::create())

196 : entry->value;

197 value->add(string);

198 stats.set(string, value.release());

199 }

200

201 Vector<RefPtr<PerStringStats>> all;

202 for (HashMap<StringImpl*, RefPtr<PerStringStats>>::iterator iter =

203 stats.begin();

204 iter != stats.end(); ++iter)

205 all.append(iter->value);

206

207 std::sort(all.begin(), all.end());

208 std::reverse(all.begin(), all.end());

209 for (size_t i = 0; i < 20 && i < all.size(); ++i)

210 all[i]->print();

211 }

212

213 StringStats StringImpl::m_stringStats;

214

215 unsigned StringStats::s_stringRemovesTillPrintStats =

216 StringStats::s_printStringStatsFrequency;

217

218 void StringStats::removeString(StringImpl* string) {

219 unsigned length = string->length();

220 --m_totalNumberStrings;

221

222 if (string->is8Bit()) {

223 --m_number8BitStrings;

224 m_total8BitData -= length;

225 } else {

226 --m_number16BitStrings;

227 m_total16BitData -= length;

228 }

229

230 if (!--s_stringRemovesTillPrintStats) {

231 s_stringRemovesTillPrintStats = s_printStringStatsFrequency;

232 printStats();

233 }

234 }

235

236 void StringStats::printStats() {

237 dataLogF("String stats for process id %d:\n", getpid());

238

239 unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitData;

240 double percent8Bit =

241 m_totalNumberStrings

242 ? ((double)m_number8BitStrings * 100) / (double)m_totalNumberStrings

243 : 0.0;

244 double average8bitLength =

245 m_number8BitStrings

246 ? (double)m_total8BitData / (double)m_number8BitStrings

247 : 0.0;

248 dataLogF(

249 "%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length "

250 "%6.1f\n",

251 m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData,

252 average8bitLength);

253

254 double percent16Bit =

255 m_totalNumberStrings

256 ? ((double)m_number16BitStrings * 100) / (double)m_totalNumberStrings

257 : 0.0;

258 double average16bitLength =

259 m_number16BitStrings

260 ? (double)m_total16BitData / (double)m_number16BitStrings

261 : 0.0;

262 dataLogF(

263 "%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length "

264 "%6.1f\n",

265 m_number16BitStrings, percent16Bit, m_total16BitData,

266 m_total16BitData * 2, average16bitLength);

267

268 double averageLength =

269 m_totalNumberStrings

270 ? (double)totalNumberCharacters / (double)m_totalNumberStrings

271 : 0.0;

272 unsigned long long totalDataBytes = m_total8BitData + m_total16BitData * 2;

273 dataLogF(

274 "%8u Total %12llu chars %12llu bytes avg length "

275 "%6.1f\n",

276 m_totalNumberStrings, totalNumberCharacters, totalDataBytes,

277 averageLength);

278 unsigned long long totalSavedBytes = m_total8BitData;

279 double percentSavings = totalSavedBytes

280 ? ((double)totalSavedBytes * 100) /

281 (double)(totalDataBytes + totalSavedBytes)

282 : 0.0;

283 dataLogF(" Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes,

284 percentSavings);

285

286 unsigned totalOverhead = m_totalNumberStrings * sizeof(StringImpl);

287 double overheadPercent = (double)totalOverhead / (double)totalDataBytes * 100;

288 dataLogF(" StringImpl overheader: %8u (%5.2f%%)\n", totalOverhead,

289 overheadPercent);

290

291 internal::callOnMainThread(&printLiveStringStats, nullptr);

292 }

293 #endif

294

295 void* StringImpl::operator new(size_t size) {

296 DCHECK_EQ(size, sizeof(StringImpl));

297 return Partitions::bufferMalloc(size, "WTF::StringImpl");

298 }

299

300 void StringImpl::operator delete(void* ptr) {

301 Partitions::bufferFree(ptr);

302 }

303

304 inline StringImpl::~StringImpl() {

305 DCHECK(!isStatic());

306

307 STRING_STATS_REMOVE_STRING(this);

308

309 if (isAtomic())

310 AtomicStringTable::instance().remove(this);

311 }

312

313 void StringImpl::destroyIfNotStatic() const {

314 if (!isStatic())

315 delete this;

316 }

317

318 void StringImpl::updateContainsOnlyASCII() const {

319 m_containsOnlyASCII = is8Bit()

320 ? charactersAreAllASCII(characters8(), length())

321 : charactersAreAllASCII(characters16(), length());

322 m_needsASCIICheck = false;

323 }

324

325 bool StringImpl::isSafeToSendToAnotherThread() const {

326 if (isStatic())

327 return true;

328 // AtomicStrings are not safe to send between threads as ~StringImpl()

329 // will try to remove them from the wrong AtomicStringTable.

330 if (isAtomic())

331 return false;

332 if (hasOneRef())

333 return true;

334 return false;

335 }

336

337 #if DCHECK_IS_ON()

338 std::string StringImpl::asciiForDebugging() const {

339 CString ascii = String(isolatedCopy()->substring(0, 128)).ascii();

340 return std::string(ascii.data(), ascii.length());

341 }

342 #endif

343

344 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length,

345 LChar*& data) {

346 if (!length) {

347 data = 0;

348 return empty;

349 }

350

351 // Allocate a single buffer large enough to contain the StringImpl

352 // struct as well as the data which it contains. This removes one

353 // heap allocation from this call.

354 StringImpl* string = static_cast<StringImpl*>(Partitions::bufferMalloc(

355 allocationSize<LChar>(length), "WTF::StringImpl"));

356

357 data = reinterpret_cast<LChar*>(string + 1);

358 return adoptRef(new (string) StringImpl(length, Force8BitConstructor));

359 }

360

361 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length,

362 UChar*& data) {

363 if (!length) {

364 data = 0;

365 return empty;

366 }

367

368 // Allocate a single buffer large enough to contain the StringImpl

369 // struct as well as the data which it contains. This removes one

370 // heap allocation from this call.

371 StringImpl* string = static_cast<StringImpl*>(Partitions::bufferMalloc(

372 allocationSize<UChar>(length), "WTF::StringImpl"));

373

374 data = reinterpret_cast<UChar*>(string + 1);

375 return adoptRef(new (string) StringImpl(length));

376 }

377

378 static StaticStringsTable& staticStrings() {

379 DEFINE_STATIC_LOCAL(StaticStringsTable, staticStrings, ());

380 return staticStrings;

381 }

382

383 #if DCHECK_IS_ON()

384 static bool s_allowCreationOfStaticStrings = true;

385 #endif

386

387 const StaticStringsTable& StringImpl::allStaticStrings() {

388 return staticStrings();

389 }

390

391 void StringImpl::freezeStaticStrings() {

392 DCHECK(isMainThread());

393

394 #if DCHECK_IS_ON()

395 s_allowCreationOfStaticStrings = false;

396 #endif

397 }

398

399 unsigned StringImpl::m_highestStaticStringLength = 0;

400

401 DEFINE_GLOBAL(StringImpl, globalEmpty);

402 DEFINE_GLOBAL(StringImpl, globalEmpty16Bit);

403 // Callers need the global empty strings to be non-const.

404 StringImpl* StringImpl::empty = const_cast<StringImpl*>(&globalEmpty);

405 StringImpl* StringImpl::empty16Bit = const_cast<StringImpl*>(&globalEmpty16Bit);

406 void StringImpl::initStatics() {

407 new ((void*)empty) StringImpl(ConstructEmptyString);

408 new ((void*)empty16Bit) StringImpl(ConstructEmptyString16Bit);

409 WTF_ANNOTATE_BENIGN_RACE(StringImpl::empty,

410 "Benign race on the reference counter of a static "

411 "string created by StringImpl::empty");

412 WTF_ANNOTATE_BENIGN_RACE(StringImpl::empty16Bit,

413 "Benign race on the reference counter of a static "

414 "string created by StringImpl::empty16Bit");

415 }

416

417 StringImpl* StringImpl::createStatic(const char* string,

418 unsigned length,

419 unsigned hash) {

420 #if DCHECK_IS_ON()

421 DCHECK(s_allowCreationOfStaticStrings);

422 #endif

423 DCHECK(string);

424 DCHECK(length);

425

426 StaticStringsTable::const_iterator it = staticStrings().find(hash);

427 if (it != staticStrings().end()) {

428 DCHECK(!memcmp(string, it->value + 1, length * sizeof(LChar)));

429 return it->value;

430 }

431

432 // Allocate a single buffer large enough to contain the StringImpl

433 // struct as well as the data which it contains. This removes one

434 // heap allocation from this call.

435 RELEASE_ASSERT(length <=

436 ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) /

437 sizeof(LChar)));

438 size_t size = sizeof(StringImpl) + length * sizeof(LChar);

439

440 WTF_INTERNAL_LEAK_SANITIZER_DISABLED_SCOPE;

441 StringImpl* impl = static_cast<StringImpl*>(

442 Partitions::bufferMalloc(size, "WTF::StringImpl"));

443

444 LChar* data = reinterpret_cast<LChar*>(impl + 1);

445 impl = new (impl) StringImpl(length, hash, StaticString);

446 memcpy(data, string, length * sizeof(LChar));

447 #if DCHECK_IS_ON()

448 impl->assertHashIsCorrect();

449 #endif

450

451 DCHECK(isMainThread());

452 m_highestStaticStringLength = std::max(m_highestStaticStringLength, length);

453 staticStrings().insert(hash, impl);

454 WTF_ANNOTATE_BENIGN_RACE(impl,

455 "Benign race on the reference counter of a static "

456 "string created by StringImpl::createStatic");

457

458 return impl;

459 }

460

461 void StringImpl::reserveStaticStringsCapacityForSize(unsigned size) {

462 #if DCHECK_IS_ON()

463 DCHECK(s_allowCreationOfStaticStrings);

464 #endif

465 staticStrings().reserveCapacityForSize(size);

466 }

467

468 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters,

469 unsigned length) {

470 if (!characters \|\| !length)

471 return empty;

472

473 UChar* data;

474 RefPtr<StringImpl> string = createUninitialized(length, data);

475 memcpy(data, characters, length * sizeof(UChar));

476 return string.release();

477 }

478

479 PassRefPtr<StringImpl> StringImpl::create(const LChar* characters,

480 unsigned length) {

481 if (!characters \|\| !length)

482 return empty;

483

484 LChar* data;

485 RefPtr<StringImpl> string = createUninitialized(length, data);

486 memcpy(data, characters, length * sizeof(LChar));

487 return string.release();

488 }

489

490 PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters,

491 unsigned length) {

492 if (!characters \|\| !length)

493 return empty;

494

495 LChar* data;

496 RefPtr<StringImpl> string = createUninitialized(length, data);

497

498 for (size_t i = 0; i < length; ++i) {

499 if (characters[i] & 0xff00)

500 return create(characters, length);

501 data[i] = static_cast<LChar>(characters[i]);

502 }

503

504 return string.release();

505 }

506

507 PassRefPtr<StringImpl> StringImpl::create(const LChar* string) {

508 if (!string)

509 return empty;

510 size_t length = strlen(reinterpret_cast<const char*>(string));

511 RELEASE_ASSERT(length <= numeric_limits<unsigned>::max());

512 return create(string, length);

513 }

514

515 bool StringImpl::containsOnlyWhitespace() {

516 // FIXME: The definition of whitespace here includes a number of characters

517 // that are not whitespace from the point of view of LayoutText; I wonder if

518 // that's a problem in practice.

519 if (is8Bit()) {

520 for (unsigned i = 0; i < m_length; ++i) {

521 UChar c = characters8()[i];

522 if (!isASCIISpace(c))

523 return false;

524 }

525

526 return true;

527 }

528

529 for (unsigned i = 0; i < m_length; ++i) {

530 UChar c = characters16()[i];

531 if (!isASCIISpace(c))

532 return false;

533 }

534 return true;

535 }

536

537 PassRefPtr<StringImpl> StringImpl::substring(unsigned start,

538 unsigned length) const {

539 if (start >= m_length)

540 return empty;

541 unsigned maxLength = m_length - start;

542 if (length >= maxLength) {

543 // PassRefPtr has trouble dealing with const arguments. It should be updated

544 // so this const_cast is not necessary.

545 if (!start)

546 return const_cast<StringImpl*>(this);

547 length = maxLength;

548 }

549 if (is8Bit())

550 return create(characters8() + start, length);

551

552 return create(characters16() + start, length);

553 }

554

555 UChar32 StringImpl::characterStartingAt(unsigned i) {

556 if (is8Bit())

557 return characters8()[i];

558 if (U16_IS_SINGLE(characters16()[i]))

559 return characters16()[i];

560 if (i + 1 < m_length && U16_IS_LEAD(characters16()[i]) &&

561 U16_IS_TRAIL(characters16()[i + 1]))

562 return U16_GET_SUPPLEMENTARY(characters16()[i], characters16()[i + 1]);

563 return 0;

564 }

565

566 unsigned StringImpl::copyTo(UChar* buffer,

567 unsigned start,

568 unsigned maxLength) const {

569 unsigned numberOfCharactersToCopy = std::min(length() - start, maxLength);

570 if (!numberOfCharactersToCopy)

571 return 0;

572 if (is8Bit())

573 copyChars(buffer, characters8() + start, numberOfCharactersToCopy);

574 else

575 copyChars(buffer, characters16() + start, numberOfCharactersToCopy);

576 return numberOfCharactersToCopy;

577 }

578

579 PassRefPtr<StringImpl> StringImpl::lowerASCII() {

580 // First scan the string for uppercase and non-ASCII characters:

581 if (is8Bit()) {

582 unsigned firstIndexToBeLowered = m_length;

583 for (unsigned i = 0; i < m_length; ++i) {

584 LChar ch = characters8()[i];

585 if (isASCIIUpper(ch)) {

586 firstIndexToBeLowered = i;

587 break;

588 }

589 }

590

591 // Nothing to do if the string is all ASCII with no uppercase.

592 if (firstIndexToBeLowered == m_length) {

593 return this;

594 }

595

596 LChar* data8;

597 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);

598 memcpy(data8, characters8(), firstIndexToBeLowered);

599

600 for (unsigned i = firstIndexToBeLowered; i < m_length; ++i) {

601 LChar ch = characters8()[i];

602 data8[i] = isASCIIUpper(ch) ? toASCIILower(ch) : ch;

603 }

604 return newImpl.release();

605 }

606 bool noUpper = true;

607 UChar ored = 0;

608

609 const UChar* end = characters16() + m_length;

610 for (const UChar* chp = characters16(); chp != end; ++chp) {

611 if (isASCIIUpper(*chp))

612 noUpper = false;

613 ored \|= *chp;

614 }

615 // Nothing to do if the string is all ASCII with no uppercase.

616 if (noUpper && !(ored & ~0x7F))

617 return this;

618

619 RELEASE_ASSERT(m_length <=

620 static_cast<unsigned>(numeric_limits<unsigned>::max()));

621 unsigned length = m_length;

622

623 UChar* data16;

624 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);

625

626 for (unsigned i = 0; i < length; ++i) {

627 UChar c = characters16()[i];

628 data16[i] = isASCIIUpper(c) ? toASCIILower(c) : c;

629 }

630 return newImpl.release();

631 }

632

633 PassRefPtr<StringImpl> StringImpl::lower() {

634 // Note: This is a hot function in the Dromaeo benchmark, specifically the

635 // no-op code path up through the first 'return' statement.

636

637 // First scan the string for uppercase and non-ASCII characters:

638 if (is8Bit()) {

639 unsigned firstIndexToBeLowered = m_length;

640 for (unsigned i = 0; i < m_length; ++i) {

641 LChar ch = characters8()[i];

642 if (UNLIKELY(isASCIIUpper(ch) \|\| ch & ~0x7F)) {

643 firstIndexToBeLowered = i;

644 break;

645 }

646 }

647

648 // Nothing to do if the string is all ASCII with no uppercase.

649 if (firstIndexToBeLowered == m_length)

650 return this;

651

652 LChar* data8;

653 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);

654 memcpy(data8, characters8(), firstIndexToBeLowered);

655

656 for (unsigned i = firstIndexToBeLowered; i < m_length; ++i) {

657 LChar ch = characters8()[i];

658 data8[i] = UNLIKELY(ch & ~0x7F) ? static_cast<LChar>(Unicode::toLower(ch))

659 : toASCIILower(ch);

660 }

661

662 return newImpl.release();

663 }

664

665 bool noUpper = true;

666 UChar ored = 0;

667

668 const UChar* end = characters16() + m_length;

669 for (const UChar* chp = characters16(); chp != end; ++chp) {

670 if (UNLIKELY(isASCIIUpper(*chp)))

671 noUpper = false;

672 ored \|= *chp;

673 }

674 // Nothing to do if the string is all ASCII with no uppercase.

675 if (noUpper && !(ored & ~0x7F))

676 return this;

677

678 RELEASE_ASSERT(m_length <=

679 static_cast<unsigned>(numeric_limits<int32_t>::max()));

680 int32_t length = m_length;

681

682 if (!(ored & ~0x7F)) {

683 UChar* data16;

684 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);

685

686 for (int32_t i = 0; i < length; ++i) {

687 UChar c = characters16()[i];

688 data16[i] = toASCIILower(c);

689 }

690 return newImpl.release();

691 }

692

693 // Do a slower implementation for cases that include non-ASCII characters.

694 UChar* data16;

695 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);

696

697 bool error;

698 int32_t realLength =

699 Unicode::toLower(data16, length, characters16(), m_length, &error);

700 if (!error && realLength == length)

701 return newImpl.release();

702

703 newImpl = createUninitialized(realLength, data16);

704 Unicode::toLower(data16, realLength, characters16(), m_length, &error);

705 if (error)

706 return this;

707 return newImpl.release();

708 }

709

710 PassRefPtr<StringImpl> StringImpl::upper() {

711 // This function could be optimized for no-op cases the way lower() is,

712 // but in empirical testing, few actual calls to upper() are no-ops, so

713 // it wouldn't be worth the extra time for pre-scanning.

714

715 RELEASE_ASSERT(m_length <=

716 static_cast<unsigned>(numeric_limits<int32_t>::max()));

717 int32_t length = m_length;

718

719 if (is8Bit()) {

720 LChar* data8;

721 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);

722

723 // Do a faster loop for the case where all the characters are ASCII.

724 LChar ored = 0;

725 for (int i = 0; i < length; ++i) {

726 LChar c = characters8()[i];

727 ored \|= c;

728 data8[i] = toASCIIUpper(c);

729 }

730 if (!(ored & ~0x7F))

731 return newImpl.release();

732

733 // Do a slower implementation for cases that include non-ASCII Latin-1

734 // characters.

735 int numberSharpSCharacters = 0;

736

737 // There are two special cases.

738 // 1. latin-1 characters when converted to upper case are 16 bit

739 // characters.

740 // 2. Lower case sharp-S converts to "SS" (two characters)

741 for (int32_t i = 0; i < length; ++i) {

742 LChar c = characters8()[i];

743 if (UNLIKELY(c == smallLetterSharpSCharacter))

744 ++numberSharpSCharacters;

745 UChar upper = static_cast<UChar>(Unicode::toUpper(c));

746 if (UNLIKELY(upper > 0xff)) {

747 // Since this upper-cased character does not fit in an 8-bit string, we

748 // need to take the 16-bit path.

749 goto upconvert;

750 }

751 data8[i] = static_cast<LChar>(upper);

752 }

753

754 if (!numberSharpSCharacters)

755 return newImpl.release();

756

757 // We have numberSSCharacters sharp-s characters, but none of the other

758 // special characters.

759 newImpl = createUninitialized(m_length + numberSharpSCharacters, data8);

760

761 LChar* dest = data8;

762

763 for (int32_t i = 0; i < length; ++i) {

764 LChar c = characters8()[i];

765 if (c == smallLetterSharpSCharacter) {

766 *dest++ = 'S';

767 *dest++ = 'S';

768 } else {

769 *dest++ = static_cast<LChar>(Unicode::toUpper(c));

770 }

771 }

772

773 return newImpl.release();

774 }

775

776 upconvert:

777 RefPtr<StringImpl> upconverted = upconvertedString();

778 const UChar* source16 = upconverted->characters16();

779

780 UChar* data16;

781 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);

782

783 // Do a faster loop for the case where all the characters are ASCII.

784 UChar ored = 0;

785 for (int i = 0; i < length; ++i) {

786 UChar c = source16[i];

787 ored \|= c;

788 data16[i] = toASCIIUpper(c);

789 }

790 if (!(ored & ~0x7F))

791 return newImpl.release();

792

793 // Do a slower implementation for cases that include non-ASCII characters.

794 bool error;

795 int32_t realLength =

796 Unicode::toUpper(data16, length, source16, m_length, &error);

797 if (!error && realLength == length)

798 return newImpl;

799 newImpl = createUninitialized(realLength, data16);

800 Unicode::toUpper(data16, realLength, source16, m_length, &error);

801 if (error)

802 return this;

803 return newImpl.release();

804 }

805

806 PassRefPtr<StringImpl> StringImpl::upperASCII() {

807 if (is8Bit()) {

808 LChar* data8;

809 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);

810

811 for (unsigned i = 0; i < m_length; ++i) {

812 LChar c = characters8()[i];

813 data8[i] = isASCIILower(c) ? toASCIIUpper(c) : c;

814 }

815 return newImpl.release();

816 }

817

818 UChar* data16;

819 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);

820

821 for (unsigned i = 0; i < m_length; ++i) {

822 UChar c = characters16()[i];

823 data16[i] = isASCIILower(c) ? toASCIIUpper(c) : c;

824 }

825 return newImpl.release();

826 }

827

828 static inline bool localeIdMatchesLang(const AtomicString& localeId,

829 const StringView& lang) {

830 RELEASE_ASSERT(lang.length() >= 2 && lang.length() <= 3);

831 if (!localeId.impl() \|\| !localeId.impl()->startsWithIgnoringCase(lang))

832 return false;

833 if (localeId.impl()->length() == lang.length())

834 return true;

835 const UChar maybeDelimiter = (*localeId.impl())[lang.length()];

836 return maybeDelimiter == '-' \|\| maybeDelimiter == '_' \|\|

837 maybeDelimiter == '@';

838 }

839

840 typedef int32_t (icuCaseConverter)(UChar,

841 int32_t,

842 const UChar*,

843 int32_t,

844 const char*,

845 UErrorCode*);

846

847 static PassRefPtr<StringImpl> caseConvert(const UChar* source16,

848 size_t length,

849 icuCaseConverter converter,

850 const char* locale,

851 StringImpl* originalString) {

852 UChar* data16;

853 size_t targetLength = length;

854 RefPtr<StringImpl> output = StringImpl::createUninitialized(length, data16);

855 do {

856 UErrorCode status = U_ZERO_ERROR;

857 targetLength =

858 converter(data16, targetLength, source16, length, locale, &status);

859 if (U_SUCCESS(status)) {

860 if (length > 0)

861 return output->substring(0, targetLength);

862 return output.release();

863 }

864 if (status != U_BUFFER_OVERFLOW_ERROR)

865 return originalString;

866 // Expand the buffer.

867 output = StringImpl::createUninitialized(targetLength, data16);

868 } while (true);

869 }

870

871 PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier) {

872 // Use the more optimized code path most of the time.

873 // Only Turkic (tr and az) languages and Lithuanian requires

874 // locale-specific lowercasing rules. Even though CLDR has el-Lower,

875 // it's identical to the locale-agnostic lowercasing. Context-dependent

876 // handling of Greek capital sigma is built into the common lowercasing

877 // function in ICU.

878 const char* localeForConversion = 0;

879 if (localeIdMatchesLang(localeIdentifier, "tr") \|\|

880 localeIdMatchesLang(localeIdentifier, "az"))

881 localeForConversion = "tr";

882 else if (localeIdMatchesLang(localeIdentifier, "lt"))

883 localeForConversion = "lt";

884 else

885 return lower();

886

887 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))

888 CRASH();

889 int length = m_length;

890

891 RefPtr<StringImpl> upconverted = upconvertedString();

892 const UChar* source16 = upconverted->characters16();

893 return caseConvert(source16, length, u_strToLower, localeForConversion, this);

894 }

895

896 PassRefPtr<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier) {

897 // Use the more-optimized code path most of the time.

898 // Only Turkic (tr and az) languages, Greek and Lithuanian require

899 // locale-specific uppercasing rules.

900 const char* localeForConversion = 0;

901 if (localeIdMatchesLang(localeIdentifier, "tr") \|\|

902 localeIdMatchesLang(localeIdentifier, "az"))

903 localeForConversion = "tr";

904 else if (localeIdMatchesLang(localeIdentifier, "el"))

905 localeForConversion = "el";

906 else if (localeIdMatchesLang(localeIdentifier, "lt"))

907 localeForConversion = "lt";

908 else

909 return upper();

910

911 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))

912 CRASH();

913 int length = m_length;

914

915 RefPtr<StringImpl> upconverted = upconvertedString();

916 const UChar* source16 = upconverted->characters16();

917

918 return caseConvert(source16, length, u_strToUpper, localeForConversion, this);

919 }

920

921 PassRefPtr<StringImpl> StringImpl::fill(UChar character) {

922 if (!(character & ~0x7F)) {

923 LChar* data;

924 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);

925 for (unsigned i = 0; i < m_length; ++i)

926 data[i] = static_cast<LChar>(character);

927 return newImpl.release();

928 }

929 UChar* data;

930 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);

931 for (unsigned i = 0; i < m_length; ++i)

932 data[i] = character;

933 return newImpl.release();

934 }

935

936 PassRefPtr<StringImpl> StringImpl::foldCase() {

937 RELEASE_ASSERT(m_length <=

938 static_cast<unsigned>(numeric_limits<int32_t>::max()));

939 int32_t length = m_length;

940

941 if (is8Bit()) {

942 // Do a faster loop for the case where all the characters are ASCII.

943 LChar* data;

944 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);

945 LChar ored = 0;

946

947 for (int32_t i = 0; i < length; ++i) {

948 LChar c = characters8()[i];

949 data[i] = toASCIILower(c);

950 ored \|= c;

951 }

952

953 if (!(ored & ~0x7F))

954 return newImpl.release();

955

956 // Do a slower implementation for cases that include non-ASCII Latin-1

957 // characters.

958 for (int32_t i = 0; i < length; ++i)

959 data[i] = static_cast<LChar>(Unicode::toLower(characters8()[i]));

960

961 return newImpl.release();

962 }

963

964 // Do a faster loop for the case where all the characters are ASCII.

965 UChar* data;

966 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);

967 UChar ored = 0;

968 for (int32_t i = 0; i < length; ++i) {

969 UChar c = characters16()[i];

970 ored \|= c;

971 data[i] = toASCIILower(c);

972 }

973 if (!(ored & ~0x7F))

974 return newImpl.release();

975

976 // Do a slower implementation for cases that include non-ASCII characters.

977 bool error;

978 int32_t realLength =

979 Unicode::foldCase(data, length, characters16(), m_length, &error);

980 if (!error && realLength == length)

981 return newImpl.release();

982 newImpl = createUninitialized(realLength, data);

983 Unicode::foldCase(data, realLength, characters16(), m_length, &error);

984 if (error)

985 return this;

986 return newImpl.release();

987 }

988

989 PassRefPtr<StringImpl> StringImpl::truncate(unsigned length) {

990 if (length >= m_length)

991 return this;

992 if (is8Bit())

993 return create(characters8(), length);

994 return create(characters16(), length);

995 }

996

997 template <class UCharPredicate>

998 inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters(

999 UCharPredicate predicate) {

1000 if (!m_length)

1001 return empty;

1002

1003 unsigned start = 0;

1004 unsigned end = m_length - 1;

1005

1006 // skip white space from start

1007 while (start <= end &&

1008 predicate(is8Bit() ? characters8()[start] : characters16()[start]))

1009 ++start;

1010

1011 // only white space

1012 if (start > end)

1013 return empty;

1014

1015 // skip white space from end

1016 while (end && predicate(is8Bit() ? characters8()[end] : characters16()[end]))

1017 --end;

1018

1019 if (!start && end == m_length - 1)

1020 return this;

1021 if (is8Bit())

1022 return create(characters8() + start, end + 1 - start);

1023 return create(characters16() + start, end + 1 - start);

1024 }

1025

1026 class UCharPredicate final {

1027 STACK_ALLOCATED();

1028

1029 public:

1030 inline UCharPredicate(CharacterMatchFunctionPtr function)

1031 : m_function(function) {}

1032

1033 inline bool operator()(UChar ch) const { return m_function(ch); }

1034

1035 private:

1036 const CharacterMatchFunctionPtr m_function;

1037 };

1038

1039 class SpaceOrNewlinePredicate final {

1040 STACK_ALLOCATED();

1041

1042 public:

1043 inline bool operator()(UChar ch) const { return isSpaceOrNewline(ch); }

1044 };

1045

1046 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace() {

1047 return stripMatchedCharacters(SpaceOrNewlinePredicate());

1048 }

1049

1050 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace(

1051 IsWhiteSpaceFunctionPtr isWhiteSpace) {

1052 return stripMatchedCharacters(UCharPredicate(isWhiteSpace));

1053 }

1054

1055 template <typename CharType>

1056 ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters(

1057 const CharType* characters,

1058 CharacterMatchFunctionPtr findMatch) {

1059 const CharType* from = characters;

1060 const CharType* fromend = from + m_length;

1061

1062 // Assume the common case will not remove any characters

1063 while (from != fromend && !findMatch(*from))

1064 ++from;

1065 if (from == fromend)

1066 return this;

1067

1068 StringBuffer<CharType> data(m_length);

1069 CharType* to = data.characters();

1070 unsigned outc = from - characters;

1071

1072 if (outc)

1073 memcpy(to, characters, outc * sizeof(CharType));

1074

1075 while (true) {

1076 while (from != fromend && findMatch(*from))

1077 ++from;

1078 while (from != fromend && !findMatch(*from))

1079 to[outc++] = *from++;

1080 if (from == fromend)

1081 break;

1082 }

1083

1084 data.shrink(outc);

1085

1086 return data.release();

1087 }

1088

1089 PassRefPtr<StringImpl> StringImpl::removeCharacters(

1090 CharacterMatchFunctionPtr findMatch) {

1091 if (is8Bit())

1092 return removeCharacters(characters8(), findMatch);

1093 return removeCharacters(characters16(), findMatch);

1094 }

1095

1096 PassRefPtr<StringImpl> StringImpl::remove(unsigned start,

1097 unsigned lengthToRemove) {

1098 if (lengthToRemove <= 0)

1099 return this;

1100 if (start >= m_length)

1101 return this;

1102

1103 lengthToRemove = std::min(m_length - start, lengthToRemove);

1104 unsigned removedEnd = start + lengthToRemove;

1105

1106 if (is8Bit()) {

1107 StringBuffer<LChar> buffer(m_length - lengthToRemove);

1108 copyChars(buffer.characters(), characters8(), start);

1109 copyChars(buffer.characters() + start, characters8() + removedEnd,

1110 m_length - removedEnd);

1111 return buffer.release();

1112 }

1113 StringBuffer<UChar> buffer(m_length - lengthToRemove);

1114 copyChars(buffer.characters(), characters16(), start);

1115 copyChars(buffer.characters() + start, characters16() + removedEnd,

1116 m_length - removedEnd);

1117 return buffer.release();

1118 }

1119

1120 template <typename CharType, class UCharPredicate>

1121 inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(

1122 UCharPredicate predicate,

1123 StripBehavior stripBehavior) {

1124 StringBuffer<CharType> data(m_length);

1125

1126 const CharType* from = getCharacters<CharType>();

1127 const CharType* fromend = from + m_length;

1128 int outc = 0;

1129 bool changedToSpace = false;

1130

1131 CharType* to = data.characters();

1132

1133 if (stripBehavior == StripExtraWhiteSpace) {

1134 while (true) {

1135 while (from != fromend && predicate(*from)) {

1136 if (*from != ' ')

1137 changedToSpace = true;

1138 ++from;

1139 }

1140 while (from != fromend && !predicate(*from))

1141 to[outc++] = *from++;

1142 if (from != fromend)

1143 to[outc++] = ' ';

1144 else

1145 break;

1146 }

1147

1148 if (outc > 0 && to[outc - 1] == ' ')

1149 --outc;

1150 } else {

1151 for (; from != fromend; ++from) {

1152 if (predicate(*from)) {

1153 if (*from != ' ')

1154 changedToSpace = true;

1155 to[outc++] = ' ';

1156 } else {

1157 to[outc++] = *from;

1158 }

1159 }

1160 }

1161

1162 if (static_cast<unsigned>(outc) == m_length && !changedToSpace)

1163 return this;

1164

1165 data.shrink(outc);

1166

1167 return data.release();

1168 }

1169

1170 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(

1171 StripBehavior stripBehavior) {

1172 if (is8Bit())

1173 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(

1174 SpaceOrNewlinePredicate(), stripBehavior);

1175 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(

1176 SpaceOrNewlinePredicate(), stripBehavior);

1177 }

1178

1179 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(

1180 IsWhiteSpaceFunctionPtr isWhiteSpace,

1181 StripBehavior stripBehavior) {

1182 if (is8Bit())

1183 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(

1184 UCharPredicate(isWhiteSpace), stripBehavior);

1185 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(

1186 UCharPredicate(isWhiteSpace), stripBehavior);

1187 }

1188

1189 int StringImpl::toIntStrict(bool* ok, int base) {

1190 if (is8Bit())

1191 return charactersToIntStrict(characters8(), m_length, ok, base);

1192 return charactersToIntStrict(characters16(), m_length, ok, base);

1193 }

1194

1195 unsigned StringImpl::toUIntStrict(bool* ok, int base) {

1196 if (is8Bit())

1197 return charactersToUIntStrict(characters8(), m_length, ok, base);

1198 return charactersToUIntStrict(characters16(), m_length, ok, base);

1199 }

1200

1201 int64_t StringImpl::toInt64Strict(bool* ok, int base) {

1202 if (is8Bit())

1203 return charactersToInt64Strict(characters8(), m_length, ok, base);

1204 return charactersToInt64Strict(characters16(), m_length, ok, base);

1205 }

1206

1207 uint64_t StringImpl::toUInt64Strict(bool* ok, int base) {

1208 if (is8Bit())

1209 return charactersToUInt64Strict(characters8(), m_length, ok, base);

1210 return charactersToUInt64Strict(characters16(), m_length, ok, base);

1211 }

1212

1213 int StringImpl::toInt(bool* ok) {

1214 if (is8Bit())

1215 return charactersToInt(characters8(), m_length, ok);

1216 return charactersToInt(characters16(), m_length, ok);

1217 }

1218

1219 unsigned StringImpl::toUInt(bool* ok) {

1220 if (is8Bit())

1221 return charactersToUInt(characters8(), m_length, ok);

1222 return charactersToUInt(characters16(), m_length, ok);

1223 }

1224

1225 int64_t StringImpl::toInt64(bool* ok) {

1226 if (is8Bit())

1227 return charactersToInt64(characters8(), m_length, ok);

1228 return charactersToInt64(characters16(), m_length, ok);

1229 }

1230

1231 uint64_t StringImpl::toUInt64(bool* ok) {

1232 if (is8Bit())

1233 return charactersToUInt64(characters8(), m_length, ok);

1234 return charactersToUInt64(characters16(), m_length, ok);

1235 }

1236

1237 double StringImpl::toDouble(bool* ok) {

1238 if (is8Bit())

1239 return charactersToDouble(characters8(), m_length, ok);

1240 return charactersToDouble(characters16(), m_length, ok);

1241 }

1242

1243 float StringImpl::toFloat(bool* ok) {

1244 if (is8Bit())

1245 return charactersToFloat(characters8(), m_length, ok);

1246 return charactersToFloat(characters16(), m_length, ok);

1247 }

1248

1249 // Table is based on ftp://ftp.unicode.org/Public/UNIDATA/CaseFolding.txt

1250 const UChar StringImpl::latin1CaseFoldTable[256] = {

1251 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008,

1252 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011,

1253 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a,

1254 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023,

1255 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c,

1256 0x002d, 0x002e, 0x002f, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035,

1257 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e,

1258 0x003f, 0x0040, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,

1259 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 0x0070,

1260 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079,

1261 0x007a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 0x0060, 0x0061, 0x0062,

1262 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b,

1263 0x006c, 0x006d, 0x006e, 0x006f, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074,

1264 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d,

1265 0x007e, 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086,

1266 0x0087, 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,

1267 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098,

1268 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1,

1269 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00aa,

1270 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0, 0x00b1, 0x00b2, 0x00b3,

1271 0x00b4, 0x03bc, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc,

1272 0x00bd, 0x00be, 0x00bf, 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5,

1273 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee,

1274 0x00ef, 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00d7,

1275 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00df, 0x00e0,

1276 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9,

1277 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 0x00f0, 0x00f1, 0x00f2,

1278 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00f9, 0x00fa, 0x00fb,

1279 0x00fc, 0x00fd, 0x00fe, 0x00ff,

1280 };

1281

1282 bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length) {

1283 DCHECK_GE(length, 0u);

1284 if (a == b)

1285 return true;

1286 while (length--) {

1287 if (StringImpl::latin1CaseFoldTable[*a++] !=

1288 StringImpl::latin1CaseFoldTable[*b++])

1289 return false;

1290 }

1291 return true;

1292 }

1293

1294 bool equalIgnoringCase(const UChar* a, const UChar* b, unsigned length) {

1295 DCHECK_GE(length, 0u);

1296 if (a == b)

1297 return true;

1298 return !Unicode::umemcasecmp(a, b, length);

1299 }

1300

1301 bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length) {

1302 while (length--) {

1303 if (foldCase(a++) != StringImpl::latin1CaseFoldTable[b++])

1304 return false;

1305 }

1306 return true;

1307 }

1308

1309 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction,

1310 unsigned start) {

1311 if (is8Bit())

1312 return WTF::find(characters8(), m_length, matchFunction, start);

1313 return WTF::find(characters16(), m_length, matchFunction, start);

1314 }

1315

1316 template <typename SearchCharacterType, typename MatchCharacterType>

1317 ALWAYS_INLINE static size_t findInternal(

1318 const SearchCharacterType* searchCharacters,

1319 const MatchCharacterType* matchCharacters,

1320 unsigned index,

1321 unsigned searchLength,

1322 unsigned matchLength) {

1323 // Optimization: keep a running hash of the strings,

1324 // only call equal() if the hashes match.

1325

1326 // delta is the number of additional times to test; delta == 0 means test only

1327 // once.

1328 unsigned delta = searchLength - matchLength;

1329

1330 unsigned searchHash = 0;

1331 unsigned matchHash = 0;

1332

1333 for (unsigned i = 0; i < matchLength; ++i) {

1334 searchHash += searchCharacters[i];

1335 matchHash += matchCharacters[i];

1336 }

1337

1338 unsigned i = 0;

1339 // keep looping until we match

1340 while (searchHash != matchHash \|\|

1341 !equal(searchCharacters + i, matchCharacters, matchLength)) {

1342 if (i == delta)

1343 return kNotFound;

1344 searchHash += searchCharacters[i + matchLength];

1345 searchHash -= searchCharacters[i];

1346 ++i;

1347 }

1348 return index + i;

1349 }

1350

1351 size_t StringImpl::find(const StringView& matchString, unsigned index) {

1352 if (UNLIKELY(matchString.isNull()))

1353 return kNotFound;

1354

1355 unsigned matchLength = matchString.length();

1356

1357 // Optimization 1: fast case for strings of length 1.

1358 if (matchLength == 1) {

1359 if (is8Bit())

1360 return WTF::find(characters8(), length(), matchString[0], index);

1361 return WTF::find(characters16(), length(), matchString[0], index);

1362 }

1363

1364 if (UNLIKELY(!matchLength))

1365 return min(index, length());

1366

1367 // Check index & matchLength are in range.

1368 if (index > length())

1369 return kNotFound;

1370 unsigned searchLength = length() - index;

1371 if (matchLength > searchLength)

1372 return kNotFound;

1373

1374 if (is8Bit()) {

1375 if (matchString.is8Bit())

1376 return findInternal(characters8() + index, matchString.characters8(),

1377 index, searchLength, matchLength);

1378 return findInternal(characters8() + index, matchString.characters16(),

1379 index, searchLength, matchLength);

1380 }

1381 if (matchString.is8Bit())

1382 return findInternal(characters16() + index, matchString.characters8(),

1383 index, searchLength, matchLength);

1384 return findInternal(characters16() + index, matchString.characters16(), index,

1385 searchLength, matchLength);

1386 }

1387

1388 template <typename SearchCharacterType, typename MatchCharacterType>

1389 ALWAYS_INLINE static size_t findIgnoringCaseInternal(

1390 const SearchCharacterType* searchCharacters,

1391 const MatchCharacterType* matchCharacters,

1392 unsigned index,

1393 unsigned searchLength,

1394 unsigned matchLength) {

1395 // delta is the number of additional times to test; delta == 0 means test only

1396 // once.

1397 unsigned delta = searchLength - matchLength;

1398

1399 unsigned i = 0;

1400 // keep looping until we match

1401 while (

1402 !equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) {

1403 if (i == delta)

1404 return kNotFound;

1405 ++i;

1406 }

1407 return index + i;

1408 }

1409

1410 size_t StringImpl::findIgnoringCase(const StringView& matchString,

1411 unsigned index) {

1412 if (UNLIKELY(matchString.isNull()))

1413 return kNotFound;

1414

1415 unsigned matchLength = matchString.length();

1416 if (!matchLength)

1417 return min(index, length());

1418

1419 // Check index & matchLength are in range.

1420 if (index > length())

1421 return kNotFound;

1422 unsigned searchLength = length() - index;

1423 if (matchLength > searchLength)

1424 return kNotFound;

1425

1426 if (is8Bit()) {

1427 if (matchString.is8Bit())

1428 return findIgnoringCaseInternal(characters8() + index,

1429 matchString.characters8(), index,

1430 searchLength, matchLength);

1431 return findIgnoringCaseInternal(characters8() + index,

1432 matchString.characters16(), index,

1433 searchLength, matchLength);

1434 }

1435 if (matchString.is8Bit())

1436 return findIgnoringCaseInternal(characters16() + index,

1437 matchString.characters8(), index,

1438 searchLength, matchLength);

1439 return findIgnoringCaseInternal(characters16() + index,

1440 matchString.characters16(), index,

1441 searchLength, matchLength);

1442 }

1443

1444 template <typename SearchCharacterType, typename MatchCharacterType>

1445 ALWAYS_INLINE static size_t findIgnoringASCIICaseInternal(

1446 const SearchCharacterType* searchCharacters,

1447 const MatchCharacterType* matchCharacters,

1448 unsigned index,

1449 unsigned searchLength,

1450 unsigned matchLength) {

1451 // delta is the number of additional times to test; delta == 0 means test only

1452 // once.

1453 unsigned delta = searchLength - matchLength;

1454

1455 unsigned i = 0;

1456 // keep looping until we match

1457 while (!equalIgnoringASCIICase(searchCharacters + i, matchCharacters,

1458 matchLength)) {

1459 if (i == delta)

1460 return kNotFound;

1461 ++i;

1462 }

1463 return index + i;

1464 }

1465

1466 size_t StringImpl::findIgnoringASCIICase(const StringView& matchString,

1467 unsigned index) {

1468 if (UNLIKELY(matchString.isNull()))

1469 return kNotFound;

1470

1471 unsigned matchLength = matchString.length();

1472 if (!matchLength)

1473 return min(index, length());

1474

1475 // Check index & matchLength are in range.

1476 if (index > length())

1477 return kNotFound;

1478 unsigned searchLength = length() - index;

1479 if (matchLength > searchLength)

1480 return kNotFound;

1481

1482 if (is8Bit()) {

1483 if (matchString.is8Bit())

1484 return findIgnoringASCIICaseInternal(characters8() + index,

1485 matchString.characters8(), index,

1486 searchLength, matchLength);

1487 return findIgnoringASCIICaseInternal(characters8() + index,

1488 matchString.characters16(), index,

1489 searchLength, matchLength);

1490 }

1491 if (matchString.is8Bit())

1492 return findIgnoringASCIICaseInternal(characters16() + index,

1493 matchString.characters8(), index,

1494 searchLength, matchLength);

1495 return findIgnoringASCIICaseInternal(characters16() + index,

1496 matchString.characters16(), index,

1497 searchLength, matchLength);

1498 }

1499

1500 size_t StringImpl::reverseFind(UChar c, unsigned index) {

1501 if (is8Bit())

1502 return WTF::reverseFind(characters8(), m_length, c, index);

1503 return WTF::reverseFind(characters16(), m_length, c, index);

1504 }

1505

1506 template <typename SearchCharacterType, typename MatchCharacterType>

1507 ALWAYS_INLINE static size_t reverseFindInternal(

1508 const SearchCharacterType* searchCharacters,

1509 const MatchCharacterType* matchCharacters,

1510 unsigned index,

1511 unsigned length,

1512 unsigned matchLength) {

1513 // Optimization: keep a running hash of the strings,

1514 // only call equal if the hashes match.

1515

1516 // delta is the number of additional times to test; delta == 0 means test only

1517 // once.

1518 unsigned delta = min(index, length - matchLength);

1519

1520 unsigned searchHash = 0;

1521 unsigned matchHash = 0;

1522 for (unsigned i = 0; i < matchLength; ++i) {

1523 searchHash += searchCharacters[delta + i];

1524 matchHash += matchCharacters[i];

1525 }

1526

1527 // keep looping until we match

1528 while (searchHash != matchHash \|\|

1529 !equal(searchCharacters + delta, matchCharacters, matchLength)) {

1530 if (!delta)

1531 return kNotFound;

1532 --delta;

1533 searchHash -= searchCharacters[delta + matchLength];

1534 searchHash += searchCharacters[delta];

1535 }

1536 return delta;

1537 }

1538

1539 size_t StringImpl::reverseFind(const StringView& matchString, unsigned index) {

1540 if (UNLIKELY(matchString.isNull()))

1541 return kNotFound;

1542

1543 unsigned matchLength = matchString.length();

1544 unsigned ourLength = length();

1545 if (!matchLength)

1546 return min(index, ourLength);

1547

1548 // Optimization 1: fast case for strings of length 1.

1549 if (matchLength == 1) {

1550 if (is8Bit())

1551 return WTF::reverseFind(characters8(), ourLength, matchString[0], index);

1552 return WTF::reverseFind(characters16(), ourLength, matchString[0], index);

1553 }

1554

1555 // Check index & matchLength are in range.

1556 if (matchLength > ourLength)

1557 return kNotFound;

1558

1559 if (is8Bit()) {

1560 if (matchString.is8Bit())

1561 return reverseFindInternal(characters8(), matchString.characters8(),

1562 index, ourLength, matchLength);

1563 return reverseFindInternal(characters8(), matchString.characters16(), index,

1564 ourLength, matchLength);

1565 }

1566 if (matchString.is8Bit())

1567 return reverseFindInternal(characters16(), matchString.characters8(), index,

1568 ourLength, matchLength);

1569 return reverseFindInternal(characters16(), matchString.characters16(), index,

1570 ourLength, matchLength);

1571 }

1572

1573 bool StringImpl::startsWith(UChar character) const {

1574 return m_length && (*this)[0] == character;

1575 }

1576

1577 bool StringImpl::startsWith(const StringView& prefix) const {

1578 if (prefix.length() > length())

1579 return false;

1580 if (is8Bit()) {

1581 if (prefix.is8Bit())

1582 return equal(characters8(), prefix.characters8(), prefix.length());

1583 return equal(characters8(), prefix.characters16(), prefix.length());

1584 }

1585 if (prefix.is8Bit())

1586 return equal(characters16(), prefix.characters8(), prefix.length());

1587 return equal(characters16(), prefix.characters16(), prefix.length());

1588 }

1589

1590 bool StringImpl::startsWithIgnoringCase(const StringView& prefix) const {

1591 if (prefix.length() > length())

1592 return false;

1593 if (is8Bit()) {

1594 if (prefix.is8Bit())

1595 return equalIgnoringCase(characters8(), prefix.characters8(),

1596 prefix.length());

1597 return equalIgnoringCase(characters8(), prefix.characters16(),

1598 prefix.length());

1599 }

1600 if (prefix.is8Bit())

1601 return equalIgnoringCase(characters16(), prefix.characters8(),

1602 prefix.length());

1603 return equalIgnoringCase(characters16(), prefix.characters16(),

1604 prefix.length());

1605 }

1606

1607 bool StringImpl::startsWithIgnoringASCIICase(const StringView& prefix) const {

1608 if (prefix.length() > length())

1609 return false;

1610 if (is8Bit()) {

1611 if (prefix.is8Bit())

1612 return equalIgnoringASCIICase(characters8(), prefix.characters8(),

1613 prefix.length());

1614 return equalIgnoringASCIICase(characters8(), prefix.characters16(),

1615 prefix.length());

1616 }

1617 if (prefix.is8Bit())

1618 return equalIgnoringASCIICase(characters16(), prefix.characters8(),

1619 prefix.length());

1620 return equalIgnoringASCIICase(characters16(), prefix.characters16(),

1621 prefix.length());

1622 }

1623

1624 bool StringImpl::endsWith(UChar character) const {

1625 return m_length && (*this)[m_length - 1] == character;

1626 }

1627

1628 bool StringImpl::endsWith(const StringView& suffix) const {

1629 if (suffix.length() > length())

1630 return false;

1631 unsigned startOffset = length() - suffix.length();

1632 if (is8Bit()) {

1633 if (suffix.is8Bit())

1634 return equal(characters8() + startOffset, suffix.characters8(),

1635 suffix.length());

1636 return equal(characters8() + startOffset, suffix.characters16(),

1637 suffix.length());

1638 }

1639 if (suffix.is8Bit())

1640 return equal(characters16() + startOffset, suffix.characters8(),

1641 suffix.length());

1642 return equal(characters16() + startOffset, suffix.characters16(),

1643 suffix.length());

1644 }

1645

1646 bool StringImpl::endsWithIgnoringCase(const StringView& suffix) const {

1647 if (suffix.length() > length())

1648 return false;

1649 unsigned startOffset = length() - suffix.length();

1650 if (is8Bit()) {

1651 if (suffix.is8Bit())

1652 return equalIgnoringCase(characters8() + startOffset,

1653 suffix.characters8(), suffix.length());

1654 return equalIgnoringCase(characters8() + startOffset, suffix.characters16(),

1655 suffix.length());

1656 }

1657 if (suffix.is8Bit())

1658 return equalIgnoringCase(characters16() + startOffset, suffix.characters8(),

1659 suffix.length());

1660 return equalIgnoringCase(characters16() + startOffset, suffix.characters16(),

1661 suffix.length());

1662 }

1663

1664 bool StringImpl::endsWithIgnoringASCIICase(const StringView& suffix) const {

1665 if (suffix.length() > length())

1666 return false;

1667 unsigned startOffset = length() - suffix.length();

1668 if (is8Bit()) {

1669 if (suffix.is8Bit())

1670 return equalIgnoringASCIICase(characters8() + startOffset,

1671 suffix.characters8(), suffix.length());

1672 return equalIgnoringASCIICase(characters8() + startOffset,

1673 suffix.characters16(), suffix.length());

1674 }

1675 if (suffix.is8Bit())

1676 return equalIgnoringASCIICase(characters16() + startOffset,

1677 suffix.characters8(), suffix.length());

1678 return equalIgnoringASCIICase(characters16() + startOffset,

1679 suffix.characters16(), suffix.length());

1680 }

1681

1682 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC) {

1683 if (oldC == newC)

1684 return this;

1685

1686 if (find(oldC) == kNotFound)

1687 return this;

1688

1689 unsigned i;

1690 if (is8Bit()) {

1691 if (newC <= 0xff) {

1692 LChar* data;

1693 LChar oldChar = static_cast<LChar>(oldC);

1694 LChar newChar = static_cast<LChar>(newC);

1695

1696 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);

1697

1698 for (i = 0; i != m_length; ++i) {

1699 LChar ch = characters8()[i];

1700 if (ch == oldChar)

1701 ch = newChar;

1702 data[i] = ch;

1703 }

1704 return newImpl.release();

1705 }

1706

1707 // There is the possibility we need to up convert from 8 to 16 bit,

1708 // create a 16 bit string for the result.

1709 UChar* data;

1710 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);

1711

1712 for (i = 0; i != m_length; ++i) {

1713 UChar ch = characters8()[i];

1714 if (ch == oldC)

1715 ch = newC;

1716 data[i] = ch;

1717 }

1718

1719 return newImpl.release();

1720 }

1721

1722 UChar* data;

1723 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);

1724

1725 for (i = 0; i != m_length; ++i) {

1726 UChar ch = characters16()[i];

1727 if (ch == oldC)

1728 ch = newC;

1729 data[i] = ch;

1730 }

1731 return newImpl.release();

1732 }

1733

1734 // TODO(esprehn): Passing a null replacement is the same as empty string for

1735 // this method but all others treat null as a no-op. We should choose one

1736 // behavior.

1737 PassRefPtr<StringImpl> StringImpl::replace(unsigned position,

1738 unsigned lengthToReplace,

1739 const StringView& string) {

1740 position = min(position, length());

1741 lengthToReplace = min(lengthToReplace, length() - position);

1742 unsigned lengthToInsert = string.length();

1743 if (!lengthToReplace && !lengthToInsert)

1744 return this;

1745

1746 RELEASE_ASSERT((length() - lengthToReplace) <

1747 (numeric_limits<unsigned>::max() - lengthToInsert));

1748

1749 if (is8Bit() && (string.isNull() \|\| string.is8Bit())) {

1750 LChar* data;

1751 RefPtr<StringImpl> newImpl =

1752 createUninitialized(length() - lengthToReplace + lengthToInsert, data);

1753 memcpy(data, characters8(), position * sizeof(LChar));

1754 if (!string.isNull())

1755 memcpy(data + position, string.characters8(),

1756 lengthToInsert * sizeof(LChar));

1757 memcpy(data + position + lengthToInsert,

1758 characters8() + position + lengthToReplace,

1759 (length() - position - lengthToReplace) * sizeof(LChar));

1760 return newImpl.release();

1761 }

1762 UChar* data;

1763 RefPtr<StringImpl> newImpl =

1764 createUninitialized(length() - lengthToReplace + lengthToInsert, data);

1765 if (is8Bit())

1766 for (unsigned i = 0; i < position; ++i)

1767 data[i] = characters8()[i];

1768 else

1769 memcpy(data, characters16(), position * sizeof(UChar));

1770 if (!string.isNull()) {

1771 if (string.is8Bit())

1772 for (unsigned i = 0; i < lengthToInsert; ++i)

1773 data[i + position] = string.characters8()[i];

1774 else

1775 memcpy(data + position, string.characters16(),

1776 lengthToInsert * sizeof(UChar));

1777 }

1778 if (is8Bit()) {

1779 for (unsigned i = 0; i < length() - position - lengthToReplace; ++i)

1780 data[i + position + lengthToInsert] =

1781 characters8()[i + position + lengthToReplace];

1782 } else {

1783 memcpy(data + position + lengthToInsert,

1784 characters16() + position + lengthToReplace,

1785 (length() - position - lengthToReplace) * sizeof(UChar));

1786 }

1787 return newImpl.release();

1788 }

1789

1790 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern,

1791 const StringView& replacement) {

1792 if (replacement.isNull())

1793 return this;

1794 if (replacement.is8Bit())

1795 return replace(pattern, replacement.characters8(), replacement.length());

1796 return replace(pattern, replacement.characters16(), replacement.length());

1797 }

1798

1799 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern,

1800 const LChar* replacement,

1801 unsigned repStrLength) {

1802 DCHECK(replacement);

1803

1804 size_t srcSegmentStart = 0;

1805 unsigned matchCount = 0;

1806

1807 // Count the matches.

1808 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {

1809 ++matchCount;

1810 ++srcSegmentStart;

1811 }

1812

1813 // If we have 0 matches then we don't have to do any more work.

1814 if (!matchCount)

1815 return this;

1816

1817 RELEASE_ASSERT(!repStrLength \|\|

1818 matchCount <= numeric_limits<unsigned>::max() / repStrLength);

1819

1820 unsigned replaceSize = matchCount * repStrLength;

1821 unsigned newSize = m_length - matchCount;

1822 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));

1823

1824 newSize += replaceSize;

1825

1826 // Construct the new data.

1827 size_t srcSegmentEnd;

1828 unsigned srcSegmentLength;

1829 srcSegmentStart = 0;

1830 unsigned dstOffset = 0;

1831

1832 if (is8Bit()) {

1833 LChar* data;

1834 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);

1835

1836 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {

1837 srcSegmentLength = srcSegmentEnd - srcSegmentStart;

1838 memcpy(data + dstOffset, characters8() + srcSegmentStart,

1839 srcSegmentLength * sizeof(LChar));

1840 dstOffset += srcSegmentLength;

1841 memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar));

1842 dstOffset += repStrLength;

1843 srcSegmentStart = srcSegmentEnd + 1;

1844 }

1845

1846 srcSegmentLength = m_length - srcSegmentStart;

1847 memcpy(data + dstOffset, characters8() + srcSegmentStart,

1848 srcSegmentLength * sizeof(LChar));

1849

1850 DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length());

1851

1852 return newImpl.release();

1853 }

1854

1855 UChar* data;

1856 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);

1857

1858 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {

1859 srcSegmentLength = srcSegmentEnd - srcSegmentStart;

1860 memcpy(data + dstOffset, characters16() + srcSegmentStart,

1861 srcSegmentLength * sizeof(UChar));

1862

1863 dstOffset += srcSegmentLength;

1864 for (unsigned i = 0; i < repStrLength; ++i)

1865 data[i + dstOffset] = replacement[i];

1866

1867 dstOffset += repStrLength;

1868 srcSegmentStart = srcSegmentEnd + 1;

1869 }

1870

1871 srcSegmentLength = m_length - srcSegmentStart;

1872 memcpy(data + dstOffset, characters16() + srcSegmentStart,

1873 srcSegmentLength * sizeof(UChar));

1874

1875 DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length());

1876

1877 return newImpl.release();

1878 }

1879

1880 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern,

1881 const UChar* replacement,

1882 unsigned repStrLength) {

1883 DCHECK(replacement);

1884

1885 size_t srcSegmentStart = 0;

1886 unsigned matchCount = 0;

1887

1888 // Count the matches.

1889 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {

1890 ++matchCount;

1891 ++srcSegmentStart;

1892 }

1893

1894 // If we have 0 matches then we don't have to do any more work.

1895 if (!matchCount)

1896 return this;

1897

1898 RELEASE_ASSERT(!repStrLength \|\|

1899 matchCount <= numeric_limits<unsigned>::max() / repStrLength);

1900

1901 unsigned replaceSize = matchCount * repStrLength;

1902 unsigned newSize = m_length - matchCount;

1903 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));

1904

1905 newSize += replaceSize;

1906

1907 // Construct the new data.

1908 size_t srcSegmentEnd;

1909 unsigned srcSegmentLength;

1910 srcSegmentStart = 0;

1911 unsigned dstOffset = 0;

1912

1913 if (is8Bit()) {

1914 UChar* data;

1915 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);

1916

1917 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {

1918 srcSegmentLength = srcSegmentEnd - srcSegmentStart;

1919 for (unsigned i = 0; i < srcSegmentLength; ++i)

1920 data[i + dstOffset] = characters8()[i + srcSegmentStart];

1921

1922 dstOffset += srcSegmentLength;

1923 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));

1924

1925 dstOffset += repStrLength;

1926 srcSegmentStart = srcSegmentEnd + 1;

1927 }

1928

1929 srcSegmentLength = m_length - srcSegmentStart;

1930 for (unsigned i = 0; i < srcSegmentLength; ++i)

1931 data[i + dstOffset] = characters8()[i + srcSegmentStart];

1932

1933 DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length());

1934

1935 return newImpl.release();

1936 }

1937

1938 UChar* data;

1939 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);

1940

1941 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {

1942 srcSegmentLength = srcSegmentEnd - srcSegmentStart;

1943 memcpy(data + dstOffset, characters16() + srcSegmentStart,

1944 srcSegmentLength * sizeof(UChar));

1945

1946 dstOffset += srcSegmentLength;

1947 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));

1948

1949 dstOffset += repStrLength;

1950 srcSegmentStart = srcSegmentEnd + 1;

1951 }

1952

1953 srcSegmentLength = m_length - srcSegmentStart;

1954 memcpy(data + dstOffset, characters16() + srcSegmentStart,

1955 srcSegmentLength * sizeof(UChar));

1956

1957 DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length());

1958

1959 return newImpl.release();

1960 }

1961

1962 PassRefPtr<StringImpl> StringImpl::replace(const StringView& pattern,

1963 const StringView& replacement) {

1964 if (pattern.isNull() \|\| replacement.isNull())

1965 return this;

1966

1967 unsigned patternLength = pattern.length();

1968 if (!patternLength)

1969 return this;

1970

1971 unsigned repStrLength = replacement.length();

1972 size_t srcSegmentStart = 0;

1973 unsigned matchCount = 0;

1974

1975 // Count the matches.

1976 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {

1977 ++matchCount;

1978 srcSegmentStart += patternLength;

1979 }

1980

1981 // If we have 0 matches, we don't have to do any more work

1982 if (!matchCount)

1983 return this;

1984

1985 unsigned newSize = m_length - matchCount * patternLength;

1986 RELEASE_ASSERT(!repStrLength \|\|

1987 matchCount <= numeric_limits<unsigned>::max() / repStrLength);

1988

1989 RELEASE_ASSERT(newSize <=

1990 (numeric_limits<unsigned>::max() - matchCount * repStrLength));

1991

1992 newSize += matchCount * repStrLength;

1993

1994 // Construct the new data

1995 size_t srcSegmentEnd;

1996 unsigned srcSegmentLength;

1997 srcSegmentStart = 0;

1998 unsigned dstOffset = 0;

1999 bool srcIs8Bit = is8Bit();

2000 bool replacementIs8Bit = replacement.is8Bit();

2001

2002 // There are 4 cases:

2003 // 1. This and replacement are both 8 bit.

2004 // 2. This and replacement are both 16 bit.

2005 // 3. This is 8 bit and replacement is 16 bit.

2006 // 4. This is 16 bit and replacement is 8 bit.

2007 if (srcIs8Bit && replacementIs8Bit) {

2008 // Case 1

2009 LChar* data;

2010 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);

2011 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {

2012 srcSegmentLength = srcSegmentEnd - srcSegmentStart;

2013 memcpy(data + dstOffset, characters8() + srcSegmentStart,

2014 srcSegmentLength * sizeof(LChar));

2015 dstOffset += srcSegmentLength;

2016 memcpy(data + dstOffset, replacement.characters8(),

2017 repStrLength * sizeof(LChar));

2018 dstOffset += repStrLength;

2019 srcSegmentStart = srcSegmentEnd + patternLength;

2020 }

2021

2022 srcSegmentLength = m_length - srcSegmentStart;

2023 memcpy(data + dstOffset, characters8() + srcSegmentStart,

2024 srcSegmentLength * sizeof(LChar));

2025

2026 DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length());

2027

2028 return newImpl.release();

2029 }

2030

2031 UChar* data;

2032 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);

2033 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {

2034 srcSegmentLength = srcSegmentEnd - srcSegmentStart;

2035 if (srcIs8Bit) {

2036 // Case 3.

2037 for (unsigned i = 0; i < srcSegmentLength; ++i)

2038 data[i + dstOffset] = characters8()[i + srcSegmentStart];

2039 } else {

2040 // Case 2 & 4.

2041 memcpy(data + dstOffset, characters16() + srcSegmentStart,

2042 srcSegmentLength * sizeof(UChar));

2043 }

2044 dstOffset += srcSegmentLength;

2045 if (replacementIs8Bit) {

2046 // Cases 2 & 3.

2047 for (unsigned i = 0; i < repStrLength; ++i)

2048 data[i + dstOffset] = replacement.characters8()[i];

2049 } else {

2050 // Case 4

2051 memcpy(data + dstOffset, replacement.characters16(),

2052 repStrLength * sizeof(UChar));

2053 }

2054 dstOffset += repStrLength;

2055 srcSegmentStart = srcSegmentEnd + patternLength;

2056 }

2057

2058 srcSegmentLength = m_length - srcSegmentStart;

2059 if (srcIs8Bit) {

2060 // Case 3.

2061 for (unsigned i = 0; i < srcSegmentLength; ++i)

2062 data[i + dstOffset] = characters8()[i + srcSegmentStart];

2063 } else {

2064 // Cases 2 & 4.

2065 memcpy(data + dstOffset, characters16() + srcSegmentStart,

2066 srcSegmentLength * sizeof(UChar));

2067 }

2068

2069 DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length());

2070

2071 return newImpl.release();

2072 }

2073

2074 PassRefPtr<StringImpl> StringImpl::upconvertedString() {

2075 if (is8Bit())

2076 return String::make16BitFrom8BitSource(characters8(), m_length)

2077 .releaseImpl();

2078 return this;

2079 }

2080

2081 static inline bool stringImplContentEqual(const StringImpl* a,

2082 const StringImpl* b) {

2083 unsigned aLength = a->length();

2084 unsigned bLength = b->length();

2085 if (aLength != bLength)

2086 return false;

2087

2088 if (a->is8Bit()) {

2089 if (b->is8Bit())

2090 return equal(a->characters8(), b->characters8(), aLength);

2091

2092 return equal(a->characters8(), b->characters16(), aLength);

2093 }

2094

2095 if (b->is8Bit())

2096 return equal(a->characters16(), b->characters8(), aLength);

2097

2098 return equal(a->characters16(), b->characters16(), aLength);

2099 }

2100

2101 bool equal(const StringImpl* a, const StringImpl* b) {

2102 if (a == b)

2103 return true;

2104 if (!a \|\| !b)

2105 return false;

2106 if (a->isAtomic() && b->isAtomic())

2107 return false;

2108

2109 return stringImplContentEqual(a, b);

2110 }

2111

2112 template <typename CharType>

2113 inline bool equalInternal(const StringImpl* a,

2114 const CharType* b,

2115 unsigned length) {

2116 if (!a)

2117 return !b;

2118 if (!b)

2119 return false;

2120

2121 if (a->length() != length)

2122 return false;

2123 if (a->is8Bit())

2124 return equal(a->characters8(), b, length);

2125 return equal(a->characters16(), b, length);

2126 }

2127

2128 bool equal(const StringImpl* a, const LChar* b, unsigned length) {

2129 return equalInternal(a, b, length);

2130 }

2131

2132 bool equal(const StringImpl* a, const UChar* b, unsigned length) {

2133 return equalInternal(a, b, length);

2134 }

2135

2136 bool equal(const StringImpl* a, const LChar* b) {

2137 if (!a)

2138 return !b;

2139 if (!b)

2140 return !a;

2141

2142 unsigned length = a->length();

2143

2144 if (a->is8Bit()) {

2145 const LChar* aPtr = a->characters8();

2146 for (unsigned i = 0; i != length; ++i) {

2147 LChar bc = b[i];

2148 LChar ac = aPtr[i];

2149 if (!bc)

2150 return false;

2151 if (ac != bc)

2152 return false;

2153 }

2154

2155 return !b[length];

2156 }

2157

2158 const UChar* aPtr = a->characters16();

2159 for (unsigned i = 0; i != length; ++i) {

2160 LChar bc = b[i];

2161 if (!bc)

2162 return false;

2163 if (aPtr[i] != bc)

2164 return false;

2165 }

2166

2167 return !b[length];

2168 }

2169

2170 bool equalNonNull(const StringImpl* a, const StringImpl* b) {

2171 DCHECK(a);

2172 DCHECK(b);

2173 if (a == b)

2174 return true;

2175

2176 return stringImplContentEqual(a, b);

2177 }

2178

2179 bool equalIgnoringNullity(StringImpl* a, StringImpl* b) {

2180 if (!a && b && !b->length())

2181 return true;

2182 if (!b && a && !a->length())

2183 return true;

2184 return equal(a, b);

2185 }

2186

2187 template <typename CharacterType1, typename CharacterType2>

2188 int codePointCompareIgnoringASCIICase(unsigned l1,

2189 unsigned l2,

2190 const CharacterType1* c1,

2191 const CharacterType2* c2) {

2192 const unsigned lmin = l1 < l2 ? l1 : l2;

2193 unsigned pos = 0;

2194 while (pos < lmin && toASCIILower(c1) == toASCIILower(c2)) {

2195 ++c1;

2196 ++c2;

2197 ++pos;

2198 }

2199

2200 if (pos < lmin)

2201 return (toASCIILower(c1[0]) > toASCIILower(c2[0])) ? 1 : -1;

2202

2203 if (l1 == l2)

2204 return 0;

2205

2206 return (l1 > l2) ? 1 : -1;

2207 }

2208

2209 int codePointCompareIgnoringASCIICase(const StringImpl* string1,

2210 const LChar* string2) {

2211 unsigned length1 = string1 ? string1->length() : 0;

2212 size_t length2 = string2 ? strlen(reinterpret_cast<const char*>(string2)) : 0;

2213

2214 if (!string1)

2215 return length2 > 0 ? -1 : 0;

2216

2217 if (!string2)

2218 return length1 > 0 ? 1 : 0;

2219

2220 if (string1->is8Bit())

2221 return codePointCompareIgnoringASCIICase(length1, length2,

2222 string1->characters8(), string2);

2223 return codePointCompareIgnoringASCIICase(length1, length2,

2224 string1->characters16(), string2);

2225 }

2226

2227 UChar32 toUpper(UChar32 c, const AtomicString& localeIdentifier) {

2228 if (!localeIdentifier.isNull()) {

2229 if (localeIdMatchesLang(localeIdentifier, "tr") \|\|

2230 localeIdMatchesLang(localeIdentifier, "az")) {

2231 if (c == 'i')

2232 return latinCapitalLetterIWithDotAbove;

2233 if (c == latinSmallLetterDotlessI)

2234 return 'I';

2235 } else if (localeIdMatchesLang(localeIdentifier, "lt")) {

2236 // TODO(rob.buis) implement upper-casing rules for lt

2237 // like in StringImpl::upper(locale).

2238 }

2239 }

2240

2241 return toUpper(c);

2242 }

2243

2244 } // namespace WTF

OLD	NEW

« no previous file with comments | « third_party/WebKit/Source/wtf/text/StringImpl.h ('k') | third_party/WebKit/Source/wtf/text/StringImplCF.cpp » ('j') | no next file with comments »