Source/WTF/wtf/text/StringImpl.cpp - Issue 14238015: Move Source/WTF/wtf to Source/wtf

Side by Side Diff: Source/WTF/wtf/text/StringImpl.cpp

Issue 14238015: Move Source/WTF/wtf to Source/wtf (Closed) Base URL: svn://svn.chromium.org/blink/trunk

Patch Set: Created 7 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 /*

2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)

3 * (C) 1999 Antti Koivisto (koivisto@kde.org)

4 * (C) 2001 Dirk Mueller ( mueller@kde.org )

5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All r ights reserved.

6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)

7 *

8 * This library is free software; you can redistribute it and/or

9 * modify it under the terms of the GNU Library General Public

10 * License as published by the Free Software Foundation; either

11 * version 2 of the License, or (at your option) any later version.

12 *

13 * This library is distributed in the hope that it will be useful,

14 * but WITHOUT ANY WARRANTY; without even the implied warranty of

15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

16 * Library General Public License for more details.

17 *

18 * You should have received a copy of the GNU Library General Public License

19 * along with this library; see the file COPYING.LIB. If not, write to

20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,

21 * Boston, MA 02110-1301, USA.

22 *

23 */

24

25 #include "config.h"

26 #include "StringImpl.h"

27

28 #include "AtomicString.h"

29 #include "StringBuffer.h"

30 #include "StringHash.h"

31 #include <wtf/ProcessID.h>

32 #include <wtf/StdLibExtras.h>

33 #include <wtf/WTFThreadData.h>

34 #include <wtf/unicode/CharacterNames.h>

35

36 #ifdef STRING_STATS

37 #include <unistd.h>

38 #include <wtf/DataLog.h>

39 #endif

40

41 using namespace std;

42

43 namespace WTF {

44

45 using namespace Unicode;

46

47 COMPILE_ASSERT(sizeof(StringImpl) == 2 * sizeof(int) + 3 * sizeof(void*), String Impl_should_stay_small);

48

49 #ifdef STRING_STATS

50 StringStats StringImpl::m_stringStats;

51

52 unsigned StringStats::s_stringRemovesTillPrintStats = StringStats::s_printString StatsFrequency;

53

54 void StringStats::removeString(StringImpl* string)

55 {

56 unsigned length = string->length();

57 bool isSubString = string->isSubString();

58

59 --m_totalNumberStrings;

60

61 if (string->has16BitShadow()) {

62 --m_numberUpconvertedStrings;

63 if (!isSubString)

64 m_totalUpconvertedData -= length;

65 }

66

67 if (string->is8Bit()) {

68 --m_number8BitStrings;

69 if (!isSubString)

70 m_total8BitData -= length;

71 } else {

72 --m_number16BitStrings;

73 if (!isSubString)

74 m_total16BitData -= length;

75 }

76

77 if (!--s_stringRemovesTillPrintStats) {

78 s_stringRemovesTillPrintStats = s_printStringStatsFrequency;

79 printStats();

80 }

81 }

82

83 void StringStats::printStats()

84 {

85 dataLogF("String stats for process id %d:\n", getCurrentProcessID());

86

87 unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitDat a;

88 double percent8Bit = m_totalNumberStrings ? ((double)m_number8BitStrings * 1 00) / (double)m_totalNumberStrings : 0.0;

89 double average8bitLength = m_number8BitStrings ? (double)m_total8BitData / ( double)m_number8BitStrings : 0.0;

90 dataLogF("%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData, av erage8bitLength);

91

92 double percent16Bit = m_totalNumberStrings ? ((double)m_number16BitStrings * 100) / (double)m_totalNumberStrings : 0.0;

93 double average16bitLength = m_number16BitStrings ? (double)m_total16BitData / (double)m_number16BitStrings : 0.0;

94 dataLogF("%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number16BitStrings, percent16Bit, m_total16BitData, m_total16BitData * 2, average16bitLength);

95

96 double percentUpconverted = m_totalNumberStrings ? ((double)m_numberUpconver tedStrings * 100) / (double)m_number8BitStrings : 0.0;

97 double averageUpconvertedLength = m_numberUpconvertedStrings ? (double)m_tot alUpconvertedData / (double)m_numberUpconvertedStrings : 0.0;

98 dataLogF("%8u (%5.2f%%) upconverted %12llu chars %12llu bytes avg length %6.1f\n", m_numberUpconvertedStrings, percentUpconverted, m_totalUpconvertedData , m_totalUpconvertedData * 2, averageUpconvertedLength);

99

100 double averageLength = m_totalNumberStrings ? (double)totalNumberCharacters / (double)m_totalNumberStrings : 0.0;

101 unsigned long long totalDataBytes = m_total8BitData + (m_total16BitData + m_ totalUpconvertedData) * 2;

102 dataLogF("%8u Total %12llu chars %12llu bytes avg length % 6.1f\n", m_totalNumberStrings, totalNumberCharacters, totalDataBytes, averageLen gth);

103 unsigned long long totalSavedBytes = m_total8BitData - m_totalUpconvertedDat a;

104 double percentSavings = totalSavedBytes ? ((double)totalSavedBytes * 100) / (double)(totalDataBytes + totalSavedBytes) : 0.0;

105 dataLogF(" Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes, percentSavings);

106 }

107 #endif

108

109

110 StringImpl::~StringImpl()

111 {

112 ASSERT(!isStatic());

113

114 STRING_STATS_REMOVE_STRING(this);

115

116 if (isAtomic())

117 AtomicString::remove(this);

118

119 BufferOwnership ownership = bufferOwnership();

120

121 if (has16BitShadow()) {

122 ASSERT(m_copyData16);

123 fastFree(m_copyData16);

124 }

125

126 if (ownership == BufferInternal)

127 return;

128 if (ownership == BufferOwned) {

129 // We use m_data8, but since it is a union with m_data16 this works eith er way.

130 ASSERT(m_data8);

131 fastFree(const_cast<LChar*>(m_data8));

132 return;

133 }

134 ASSERT(ownership == BufferSubstring);

135 ASSERT(m_substringBuffer);

136 m_substringBuffer->deref();

137 }

138

139 PassRefPtr<StringImpl> StringImpl::createFromLiteral(const char* characters, uns igned length)

140 {

141 ASSERT_WITH_MESSAGE(length, "Use StringImpl::empty() to create an empty stri ng");

142 ASSERT(charactersAreAllASCII<LChar>(reinterpret_cast<const LChar*>(character s), length));

143 return adoptRef(new StringImpl(characters, length, ConstructFromLiteral));

144 }

145

146 PassRefPtr<StringImpl> StringImpl::createFromLiteral(const char* characters)

147 {

148 size_t length = strlen(characters);

149 ASSERT_WITH_MESSAGE(length, "Use StringImpl::empty() to create an empty stri ng");

150 ASSERT(charactersAreAllASCII<LChar>(reinterpret_cast<const LChar*>(character s), length));

151 return adoptRef(new StringImpl(characters, length, ConstructFromLiteral));

152 }

153

154 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, LChar*& data)

155 {

156 if (!length) {

157 data = 0;

158 return empty();

159 }

160

161 // Allocate a single buffer large enough to contain the StringImpl

162 // struct as well as the data which it contains. This removes one

163 // heap allocation from this call.

164 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(Str ingImpl)) / sizeof(LChar)));

165 size_t size = sizeof(StringImpl) + length * sizeof(LChar);

166 StringImpl* string = static_cast<StringImpl*>(fastMalloc(size));

167

168 data = reinterpret_cast<LChar*>(string + 1);

169 return adoptRef(new (NotNull, string) StringImpl(length, Force8BitConstructo r));

170 }

171

172 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data)

173 {

174 if (!length) {

175 data = 0;

176 return empty();

177 }

178

179 // Allocate a single buffer large enough to contain the StringImpl

180 // struct as well as the data which it contains. This removes one

181 // heap allocation from this call.

182 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(Str ingImpl)) / sizeof(UChar)));

183 size_t size = sizeof(StringImpl) + length * sizeof(UChar);

184 StringImpl* string = static_cast<StringImpl*>(fastMalloc(size));

185

186 data = reinterpret_cast<UChar*>(string + 1);

187 return adoptRef(new (NotNull, string) StringImpl(length));

188 }

189

190 PassRefPtr<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalStr ing, unsigned length, LChar*& data)

191 {

192 ASSERT(originalString->is8Bit());

193 ASSERT(originalString->hasOneRef());

194 ASSERT(originalString->bufferOwnership() == BufferInternal);

195

196 if (!length) {

197 data = 0;

198 return empty();

199 }

200

201 // Same as createUninitialized() except here we use fastRealloc.

202 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(Str ingImpl)) / sizeof(LChar)));

203 size_t size = sizeof(StringImpl) + length * sizeof(LChar);

204 originalString->~StringImpl();

205 StringImpl* string = static_cast<StringImpl*>(fastRealloc(originalString.lea kRef(), size));

206

207 data = reinterpret_cast<LChar*>(string + 1);

208 return adoptRef(new (NotNull, string) StringImpl(length, Force8BitConstructo r));

209 }

210

211 PassRefPtr<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalStr ing, unsigned length, UChar*& data)

212 {

213 ASSERT(!originalString->is8Bit());

214 ASSERT(originalString->hasOneRef());

215 ASSERT(originalString->bufferOwnership() == BufferInternal);

216

217 if (!length) {

218 data = 0;

219 return empty();

220 }

221

222 // Same as createUninitialized() except here we use fastRealloc.

223 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(Str ingImpl)) / sizeof(UChar)));

224 size_t size = sizeof(StringImpl) + length * sizeof(UChar);

225 originalString->~StringImpl();

226 StringImpl* string = static_cast<StringImpl*>(fastRealloc(originalString.lea kRef(), size));

227

228 data = reinterpret_cast<UChar*>(string + 1);

229 return adoptRef(new (NotNull, string) StringImpl(length));

230 }

231

232 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned leng th)

233 {

234 if (!characters \|\| !length)

235 return empty();

236

237 UChar* data;

238 RefPtr<StringImpl> string = createUninitialized(length, data);

239 memcpy(data, characters, length * sizeof(UChar));

240 return string.release();

241 }

242

243 PassRefPtr<StringImpl> StringImpl::create(const LChar* characters, unsigned leng th)

244 {

245 if (!characters \|\| !length)

246 return empty();

247

248 LChar* data;

249 RefPtr<StringImpl> string = createUninitialized(length, data);

250 memcpy(data, characters, length * sizeof(LChar));

251 return string.release();

252 }

253

254 PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters, unsigned length)

255 {

256 if (!characters \|\| !length)

257 return empty();

258

259 LChar* data;

260 RefPtr<StringImpl> string = createUninitialized(length, data);

261

262 for (size_t i = 0; i < length; ++i) {

263 if (characters[i] & 0xff00)

264 return create(characters, length);

265 data[i] = static_cast<LChar>(characters[i]);

266 }

267

268 return string.release();

269 }

270

271 PassRefPtr<StringImpl> StringImpl::create(const LChar* string)

272 {

273 if (!string)

274 return empty();

275 size_t length = strlen(reinterpret_cast<const char*>(string));

276 RELEASE_ASSERT(length <= numeric_limits<unsigned>::max());

277 return create(string, length);

278 }

279

280 const UChar* StringImpl::getData16SlowCase() const

281 {

282 if (has16BitShadow())

283 return m_copyData16;

284

285 if (bufferOwnership() == BufferSubstring) {

286 // If this is a substring, return a pointer into the parent string.

287 // TODO: Consider severing this string from the parent string

288 unsigned offset = m_data8 - m_substringBuffer->characters8();

289 return m_substringBuffer->characters() + offset;

290 }

291

292 STRING_STATS_ADD_UPCONVERTED_STRING(m_length);

293

294 unsigned len = length();

295 if (hasTerminatingNullCharacter())

296 ++len;

297

298 m_copyData16 = static_cast<UChar>(fastMalloc(len sizeof(UChar)));

299

300 m_hashAndFlags \|= s_hashFlagHas16BitShadow;

301

302 upconvertCharacters(0, len);

303

304 return m_copyData16;

305 }

306

307 void StringImpl::upconvertCharacters(unsigned start, unsigned end) const

308 {

309 ASSERT(is8Bit());

310 ASSERT(has16BitShadow());

311

312 for (size_t i = start; i < end; ++i)

313 m_copyData16[i] = m_data8[i];

314 }

315

316

317 bool StringImpl::containsOnlyWhitespace()

318 {

319 // FIXME: The definition of whitespace here includes a number of characters

320 // that are not whitespace from the point of view of RenderText; I wonder if

321 // that's a problem in practice.

322 if (is8Bit()) {

323 for (unsigned i = 0; i < m_length; ++i) {

324 UChar c = m_data8[i];

325 if (!isASCIISpace(c))

326 return false;

327 }

328

329 return true;

330 }

331

332 for (unsigned i = 0; i < m_length; ++i) {

333 UChar c = m_data16[i];

334 if (!isASCIISpace(c))

335 return false;

336 }

337 return true;

338 }

339

340 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length)

341 {

342 if (start >= m_length)

343 return empty();

344 unsigned maxLength = m_length - start;

345 if (length >= maxLength) {

346 if (!start)

347 return this;

348 length = maxLength;

349 }

350 if (is8Bit())

351 return create(m_data8 + start, length);

352

353 return create(m_data16 + start, length);

354 }

355

356 UChar32 StringImpl::characterStartingAt(unsigned i)

357 {

358 if (is8Bit())

359 return m_data8[i];

360 if (U16_IS_SINGLE(m_data16[i]))

361 return m_data16[i];

362 if (i + 1 < m_length && U16_IS_LEAD(m_data16[i]) && U16_IS_TRAIL(m_data16[i + 1]))

363 return U16_GET_SUPPLEMENTARY(m_data16[i], m_data16[i + 1]);

364 return 0;

365 }

366

367 PassRefPtr<StringImpl> StringImpl::lower()

368 {

369 // Note: This is a hot function in the Dromaeo benchmark, specifically the

370 // no-op code path up through the first 'return' statement.

371

372 // First scan the string for uppercase and non-ASCII characters:

373 bool noUpper = true;

374 UChar ored = 0;

375 if (is8Bit()) {

376 const LChar* end = m_data8 + m_length;

377 for (const LChar* chp = m_data8; chp != end; ++chp) {

378 if (UNLIKELY(isASCIIUpper(*chp)))

379 noUpper = false;

380 ored \|= *chp;

381 }

382 // Nothing to do if the string is all ASCII with no uppercase.

383 if (noUpper && !(ored & ~0x7F))

384 return this;

385

386 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t> ::max()));

387 int32_t length = m_length;

388

389 LChar* data8;

390 RefPtr<StringImpl> newImpl = createUninitialized(length, data8);

391

392 if (!(ored & ~0x7F)) {

393 for (int32_t i = 0; i < length; ++i)

394 data8[i] = toASCIILower(m_data8[i]);

395

396 return newImpl.release();

397 }

398

399 // Do a slower implementation for cases that include non-ASCII Latin-1 c haracters.

400 for (int32_t i = 0; i < length; ++i)

401 data8[i] = static_cast<LChar>(Unicode::toLower(m_data8[i]));

402

403 return newImpl.release();

404 }

405

406 const UChar *end = m_data16 + m_length;

407 for (const UChar* chp = m_data16; chp != end; ++chp) {

408 if (UNLIKELY(isASCIIUpper(*chp)))

409 noUpper = false;

410 ored \|= *chp;

411 }

412 // Nothing to do if the string is all ASCII with no uppercase.

413 if (noUpper && !(ored & ~0x7F))

414 return this;

415

416 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::ma x()));

417 int32_t length = m_length;

418

419 if (!(ored & ~0x7F)) {

420 UChar* data16;

421 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);

422

423 for (int32_t i = 0; i < length; ++i) {

424 UChar c = m_data16[i];

425 data16[i] = toASCIILower(c);

426 }

427 return newImpl.release();

428 }

429

430 // Do a slower implementation for cases that include non-ASCII characters.

431 UChar* data16;

432 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);

433

434 bool error;

435 int32_t realLength = Unicode::toLower(data16, length, m_data16, m_length, &e rror);

436 if (!error && realLength == length)

437 return newImpl.release();

438

439 newImpl = createUninitialized(realLength, data16);

440 Unicode::toLower(data16, realLength, m_data16, m_length, &error);

441 if (error)

442 return this;

443 return newImpl.release();

444 }

445

446 PassRefPtr<StringImpl> StringImpl::upper()

447 {

448 // This function could be optimized for no-op cases the way lower() is,

449 // but in empirical testing, few actual calls to upper() are no-ops, so

450 // it wouldn't be worth the extra time for pre-scanning.

451

452 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::ma x()));

453 int32_t length = m_length;

454

455 if (is8Bit()) {

456 LChar* data8;

457 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);

458

459 // Do a faster loop for the case where all the characters are ASCII.

460 LChar ored = 0;

461 for (int i = 0; i < length; ++i) {

462 LChar c = m_data8[i];

463 ored \|= c;

464 data8[i] = toASCIIUpper(c);

465 }

466 if (!(ored & ~0x7F))

467 return newImpl.release();

468

469 // Do a slower implementation for cases that include non-ASCII Latin-1 c haracters.

470 int numberSharpSCharacters = 0;

471

472 // There are two special cases.

473 // 1. latin-1 characters when converted to upper case are 16 bit charac ters.

474 // 2. Lower case sharp-S converts to "SS" (two characters)

475 for (int32_t i = 0; i < length; ++i) {

476 LChar c = m_data8[i];

477 if (UNLIKELY(c == smallLetterSharpS))

478 ++numberSharpSCharacters;

479 UChar upper = Unicode::toUpper(c);

480 if (UNLIKELY(upper > 0xff)) {

481 // Since this upper-cased character does not fit in an 8-bit str ing, we need to take the 16-bit path.

482 goto upconvert;

483 }

484 data8[i] = static_cast<LChar>(upper);

485 }

486

487 if (!numberSharpSCharacters)

488 return newImpl.release();

489

490 // We have numberSSCharacters sharp-s characters, but none of the other special characters.

491 newImpl = createUninitialized(m_length + numberSharpSCharacters, data8);

492

493 LChar* dest = data8;

494

495 for (int32_t i = 0; i < length; ++i) {

496 LChar c = m_data8[i];

497 if (c == smallLetterSharpS) {

498 *dest++ = 'S';

499 *dest++ = 'S';

500 } else

501 *dest++ = static_cast<LChar>(Unicode::toUpper(c));

502 }

503

504 return newImpl.release();

505 }

506

507 upconvert:

508 const UChar* source16 = characters();

509

510 UChar* data16;

511 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);

512

513 // Do a faster loop for the case where all the characters are ASCII.

514 UChar ored = 0;

515 for (int i = 0; i < length; ++i) {

516 UChar c = source16[i];

517 ored \|= c;

518 data16[i] = toASCIIUpper(c);

519 }

520 if (!(ored & ~0x7F))

521 return newImpl.release();

522

523 // Do a slower implementation for cases that include non-ASCII characters.

524 bool error;

525 newImpl = createUninitialized(m_length, data16);

526 int32_t realLength = Unicode::toUpper(data16, length, source16, m_length, &e rror);

527 if (!error && realLength == length)

528 return newImpl;

529 newImpl = createUninitialized(realLength, data16);

530 Unicode::toUpper(data16, realLength, source16, m_length, &error);

531 if (error)

532 return this;

533 return newImpl.release();

534 }

535

536 PassRefPtr<StringImpl> StringImpl::fill(UChar character)

537 {

538 if (!m_length)

539 return this;

540

541 if (!(character & ~0x7F)) {

542 LChar* data;

543 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);

544 for (unsigned i = 0; i < m_length; ++i)

545 data[i] = character;

546 return newImpl.release();

547 }

548 UChar* data;

549 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);

550 for (unsigned i = 0; i < m_length; ++i)

551 data[i] = character;

552 return newImpl.release();

553 }

554

555 PassRefPtr<StringImpl> StringImpl::foldCase()

556 {

557 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::ma x()));

558 int32_t length = m_length;

559

560 if (is8Bit()) {

561 // Do a faster loop for the case where all the characters are ASCII.

562 LChar* data;

563 RefPtr <StringImpl>newImpl = createUninitialized(m_length, data);

564 LChar ored = 0;

565

566 for (int32_t i = 0; i < length; ++i) {

567 LChar c = m_data8[i];

568 data[i] = toASCIILower(c);

569 ored \|= c;

570 }

571

572 if (!(ored & ~0x7F))

573 return newImpl.release();

574

575 // Do a slower implementation for cases that include non-ASCII Latin-1 c haracters.

576 for (int32_t i = 0; i < length; ++i)

577 data[i] = static_cast<LChar>(Unicode::toLower(m_data8[i]));

578

579 return newImpl.release();

580 }

581

582 // Do a faster loop for the case where all the characters are ASCII.

583 UChar* data;

584 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);

585 UChar ored = 0;

586 for (int32_t i = 0; i < length; ++i) {

587 UChar c = m_data16[i];

588 ored \|= c;

589 data[i] = toASCIILower(c);

590 }

591 if (!(ored & ~0x7F))

592 return newImpl.release();

593

594 // Do a slower implementation for cases that include non-ASCII characters.

595 bool error;

596 int32_t realLength = Unicode::foldCase(data, length, m_data16, m_length, &er ror);

597 if (!error && realLength == length)

598 return newImpl.release();

599 newImpl = createUninitialized(realLength, data);

600 Unicode::foldCase(data, realLength, m_data16, m_length, &error);

601 if (error)

602 return this;

603 return newImpl.release();

604 }

605

606 template <class UCharPredicate>

607 inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters(UCharPredicate predicate)

608 {

609 if (!m_length)

610 return empty();

611

612 unsigned start = 0;

613 unsigned end = m_length - 1;

614

615 // skip white space from start

616 while (start <= end && predicate(is8Bit() ? m_data8[start] : m_data16[start] ))

617 ++start;

618

619 // only white space

620 if (start > end)

621 return empty();

622

623 // skip white space from end

624 while (end && predicate(is8Bit() ? m_data8[end] : m_data16[end]))

625 --end;

626

627 if (!start && end == m_length - 1)

628 return this;

629 if (is8Bit())

630 return create(m_data8 + start, end + 1 - start);

631 return create(m_data16 + start, end + 1 - start);

632 }

633

634 class UCharPredicate {

635 public:

636 inline UCharPredicate(CharacterMatchFunctionPtr function): m_function(functi on) { }

637

638 inline bool operator()(UChar ch) const

639 {

640 return m_function(ch);

641 }

642

643 private:

644 const CharacterMatchFunctionPtr m_function;

645 };

646

647 class SpaceOrNewlinePredicate {

648 public:

649 inline bool operator()(UChar ch) const

650 {

651 return isSpaceOrNewline(ch);

652 }

653 };

654

655 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace()

656 {

657 return stripMatchedCharacters(SpaceOrNewlinePredicate());

658 }

659

660 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhi teSpace)

661 {

662 return stripMatchedCharacters(UCharPredicate(isWhiteSpace));

663 }

664

665 template <typename CharType>

666 ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters(const CharType * characters, CharacterMatchFunctionPtr findMatch)

667 {

668 const CharType* from = characters;

669 const CharType* fromend = from + m_length;

670

671 // Assume the common case will not remove any characters

672 while (from != fromend && !findMatch(*from))

673 ++from;

674 if (from == fromend)

675 return this;

676

677 StringBuffer<CharType> data(m_length);

678 CharType* to = data.characters();

679 unsigned outc = from - characters;

680

681 if (outc)

682 memcpy(to, characters, outc * sizeof(CharType));

683

684 while (true) {

685 while (from != fromend && findMatch(*from))

686 ++from;

687 while (from != fromend && !findMatch(*from))

688 to[outc++] = *from++;

689 if (from == fromend)

690 break;

691 }

692

693 data.shrink(outc);

694

695 return adopt(data);

696 }

697

698 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr fi ndMatch)

699 {

700 if (is8Bit())

701 return removeCharacters(characters8(), findMatch);

702 return removeCharacters(characters16(), findMatch);

703 }

704

705 template <typename CharType, class UCharPredicate>

706 inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UChar Predicate predicate)

707 {

708 StringBuffer<CharType> data(m_length);

709

710 const CharType* from = getCharacters<CharType>();

711 const CharType* fromend = from + m_length;

712 int outc = 0;

713 bool changedToSpace = false;

714

715 CharType* to = data.characters();

716

717 while (true) {

718 while (from != fromend && predicate(*from)) {

719 if (*from != ' ')

720 changedToSpace = true;

721 ++from;

722 }

723 while (from != fromend && !predicate(*from))

724 to[outc++] = *from++;

725 if (from != fromend)

726 to[outc++] = ' ';

727 else

728 break;

729 }

730

731 if (outc > 0 && to[outc - 1] == ' ')

732 --outc;

733

734 if (static_cast<unsigned>(outc) == m_length && !changedToSpace)

735 return this;

736

737 data.shrink(outc);

738

739 return adopt(data);

740 }

741

742 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace()

743 {

744 if (is8Bit())

745 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(SpaceOrNewlin ePredicate());

746 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(SpaceOrNewlinePre dicate());

747 }

748

749 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr is WhiteSpace)

750 {

751 if (is8Bit())

752 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(UCharPredicat e(isWhiteSpace));

753 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(UCharPredicate(is WhiteSpace));

754 }

755

756 int StringImpl::toIntStrict(bool* ok, int base)

757 {

758 if (is8Bit())

759 return charactersToIntStrict(characters8(), m_length, ok, base);

760 return charactersToIntStrict(characters16(), m_length, ok, base);

761 }

762

763 unsigned StringImpl::toUIntStrict(bool* ok, int base)

764 {

765 if (is8Bit())

766 return charactersToUIntStrict(characters8(), m_length, ok, base);

767 return charactersToUIntStrict(characters16(), m_length, ok, base);

768 }

769

770 int64_t StringImpl::toInt64Strict(bool* ok, int base)

771 {

772 if (is8Bit())

773 return charactersToInt64Strict(characters8(), m_length, ok, base);

774 return charactersToInt64Strict(characters16(), m_length, ok, base);

775 }

776

777 uint64_t StringImpl::toUInt64Strict(bool* ok, int base)

778 {

779 if (is8Bit())

780 return charactersToUInt64Strict(characters8(), m_length, ok, base);

781 return charactersToUInt64Strict(characters16(), m_length, ok, base);

782 }

783

784 intptr_t StringImpl::toIntPtrStrict(bool* ok, int base)

785 {

786 if (is8Bit())

787 return charactersToIntPtrStrict(characters8(), m_length, ok, base);

788 return charactersToIntPtrStrict(characters16(), m_length, ok, base);

789 }

790

791 int StringImpl::toInt(bool* ok)

792 {

793 if (is8Bit())

794 return charactersToInt(characters8(), m_length, ok);

795 return charactersToInt(characters16(), m_length, ok);

796 }

797

798 unsigned StringImpl::toUInt(bool* ok)

799 {

800 if (is8Bit())

801 return charactersToUInt(characters8(), m_length, ok);

802 return charactersToUInt(characters16(), m_length, ok);

803 }

804

805 int64_t StringImpl::toInt64(bool* ok)

806 {

807 if (is8Bit())

808 return charactersToInt64(characters8(), m_length, ok);

809 return charactersToInt64(characters16(), m_length, ok);

810 }

811

812 uint64_t StringImpl::toUInt64(bool* ok)

813 {

814 if (is8Bit())

815 return charactersToUInt64(characters8(), m_length, ok);

816 return charactersToUInt64(characters16(), m_length, ok);

817 }

818

819 intptr_t StringImpl::toIntPtr(bool* ok)

820 {

821 if (is8Bit())

822 return charactersToIntPtr(characters8(), m_length, ok);

823 return charactersToIntPtr(characters16(), m_length, ok);

824 }

825

826 double StringImpl::toDouble(bool* ok)

827 {

828 if (is8Bit())

829 return charactersToDouble(characters8(), m_length, ok);

830 return charactersToDouble(characters16(), m_length, ok);

831 }

832

833 float StringImpl::toFloat(bool* ok)

834 {

835 if (is8Bit())

836 return charactersToFloat(characters8(), m_length, ok);

837 return charactersToFloat(characters16(), m_length, ok);

838 }

839

840 bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length)

841 {

842 while (length--) {

843 LChar bc = *b++;

844 if (foldCase(*a++) != foldCase(bc))

845 return false;

846 }

847 return true;

848 }

849

850 bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length)

851 {

852 while (length--) {

853 LChar bc = *b++;

854 if (foldCase(*a++) != foldCase(bc))

855 return false;

856 }

857 return true;

858 }

859

860 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start)

861 {

862 if (is8Bit())

863 return WTF::find(characters8(), m_length, matchFunction, start);

864 return WTF::find(characters16(), m_length, matchFunction, start);

865 }

866

867 size_t StringImpl::find(const LChar* matchString, unsigned index)

868 {

869 // Check for null or empty string to match against

870 if (!matchString)

871 return notFound;

872 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString) );

873 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());

874 unsigned matchLength = matchStringLength;

875 if (!matchLength)

876 return min(index, length());

877

878 // Optimization 1: fast case for strings of length 1.

879 if (matchLength == 1)

880 return WTF::find(characters16(), length(), *matchString, index);

881

882 // Check index & matchLength are in range.

883 if (index > length())

884 return notFound;

885 unsigned searchLength = length() - index;

886 if (matchLength > searchLength)

887 return notFound;

888 // delta is the number of additional times to test; delta == 0 means test on ly once.

889 unsigned delta = searchLength - matchLength;

890

891 const UChar* searchCharacters = characters() + index;

892

893 // Optimization 2: keep a running hash of the strings,

894 // only call equal if the hashes match.

895 unsigned searchHash = 0;

896 unsigned matchHash = 0;

897 for (unsigned i = 0; i < matchLength; ++i) {

898 searchHash += searchCharacters[i];

899 matchHash += matchString[i];

900 }

901

902 unsigned i = 0;

903 // keep looping until we match

904 while (searchHash != matchHash \|\| !equal(searchCharacters + i, matchString, matchLength)) {

905 if (i == delta)

906 return notFound;

907 searchHash += searchCharacters[i + matchLength];

908 searchHash -= searchCharacters[i];

909 ++i;

910 }

911 return index + i;

912 }

913

914 size_t StringImpl::findIgnoringCase(const LChar* matchString, unsigned index)

915 {

916 // Check for null or empty string to match against

917 if (!matchString)

918 return notFound;

919 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString) );

920 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());

921 unsigned matchLength = matchStringLength;

922 if (!matchLength)

923 return min(index, length());

924

925 // Check index & matchLength are in range.

926 if (index > length())

927 return notFound;

928 unsigned searchLength = length() - index;

929 if (matchLength > searchLength)

930 return notFound;

931 // delta is the number of additional times to test; delta == 0 means test on ly once.

932 unsigned delta = searchLength - matchLength;

933

934 const UChar* searchCharacters = characters() + index;

935

936 unsigned i = 0;

937 // keep looping until we match

938 while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) {

939 if (i == delta)

940 return notFound;

941 ++i;

942 }

943 return index + i;

944 }

945

946 template <typename SearchCharacterType, typename MatchCharacterType>

947 ALWAYS_INLINE static size_t findInner(const SearchCharacterType* searchCharacter s, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLen gth, unsigned matchLength)

948 {

949 // Optimization: keep a running hash of the strings,

950 // only call equal() if the hashes match.

951

952 // delta is the number of additional times to test; delta == 0 means test on ly once.

953 unsigned delta = searchLength - matchLength;

954

955 unsigned searchHash = 0;

956 unsigned matchHash = 0;

957

958 for (unsigned i = 0; i < matchLength; ++i) {

959 searchHash += searchCharacters[i];

960 matchHash += matchCharacters[i];

961 }

962

963 unsigned i = 0;

964 // keep looping until we match

965 while (searchHash != matchHash \|\| !equal(searchCharacters + i, matchCharacte rs, matchLength)) {

966 if (i == delta)

967 return notFound;

968 searchHash += searchCharacters[i + matchLength];

969 searchHash -= searchCharacters[i];

970 ++i;

971 }

972 return index + i;

973 }

974

975 size_t StringImpl::find(StringImpl* matchString)

976 {

977 // Check for null string to match against

978 if (UNLIKELY(!matchString))

979 return notFound;

980 unsigned matchLength = matchString->length();

981

982 // Optimization 1: fast case for strings of length 1.

983 if (matchLength == 1) {

984 if (is8Bit()) {

985 if (matchString->is8Bit())

986 return WTF::find(characters8(), length(), matchString->character s8()[0]);

987 return WTF::find(characters8(), length(), matchString->characters16( )[0]);

988 }

989 if (matchString->is8Bit())

990 return WTF::find(characters16(), length(), matchString->characters8( )[0]);

991 return WTF::find(characters16(), length(), matchString->characters16()[0 ]);

992 }

993

994 // Check matchLength is in range.

995 if (matchLength > length())

996 return notFound;

997

998 // Check for empty string to match against

999 if (UNLIKELY(!matchLength))

1000 return 0;

1001

1002 if (is8Bit()) {

1003 if (matchString->is8Bit())

1004 return findInner(characters8(), matchString->characters8(), 0, lengt h(), matchLength);

1005 return findInner(characters8(), matchString->characters16(), 0, length() , matchLength);

1006 }

1007

1008 if (matchString->is8Bit())

1009 return findInner(characters16(), matchString->characters8(), 0, length() , matchLength);

1010

1011 return findInner(characters16(), matchString->characters16(), 0, length(), m atchLength);

1012 }

1013

1014 size_t StringImpl::find(StringImpl* matchString, unsigned index)

1015 {

1016 // Check for null or empty string to match against

1017 if (UNLIKELY(!matchString))

1018 return notFound;

1019

1020 unsigned matchLength = matchString->length();

1021

1022 // Optimization 1: fast case for strings of length 1.

1023 if (matchLength == 1) {

1024 if (is8Bit())

1025 return WTF::find(characters8(), length(), (*matchString)[0], index);

1026 return WTF::find(characters16(), length(), (*matchString)[0], index);

1027 }

1028

1029 if (UNLIKELY(!matchLength))

1030 return min(index, length());

1031

1032 // Check index & matchLength are in range.

1033 if (index > length())

1034 return notFound;

1035 unsigned searchLength = length() - index;

1036 if (matchLength > searchLength)

1037 return notFound;

1038

1039 if (is8Bit()) {

1040 if (matchString->is8Bit())

1041 return findInner(characters8() + index, matchString->characters8(), index, searchLength, matchLength);

1042 return findInner(characters8() + index, matchString->characters16(), ind ex, searchLength, matchLength);

1043 }

1044

1045 if (matchString->is8Bit())

1046 return findInner(characters16() + index, matchString->characters8(), ind ex, searchLength, matchLength);

1047

1048 return findInner(characters16() + index, matchString->characters16(), index, searchLength, matchLength);

1049 }

1050

1051 template <typename SearchCharacterType, typename MatchCharacterType>

1052 ALWAYS_INLINE static size_t findIgnoringCaseInner(const SearchCharacterType* sea rchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsign ed searchLength, unsigned matchLength)

1053 {

1054 // delta is the number of additional times to test; delta == 0 means test on ly once.

1055 unsigned delta = searchLength - matchLength;

1056

1057 unsigned i = 0;

1058 // keep looping until we match

1059 while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength )) {

1060 if (i == delta)

1061 return notFound;

1062 ++i;

1063 }

1064 return index + i;

1065 }

1066

1067 size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index)

1068 {

1069 // Check for null or empty string to match against

1070 if (!matchString)

1071 return notFound;

1072 unsigned matchLength = matchString->length();

1073 if (!matchLength)

1074 return min(index, length());

1075

1076 // Check index & matchLength are in range.

1077 if (index > length())

1078 return notFound;

1079 unsigned searchLength = length() - index;

1080 if (matchLength > searchLength)

1081 return notFound;

1082

1083 if (is8Bit()) {

1084 if (matchString->is8Bit())

1085 return findIgnoringCaseInner(characters8() + index, matchString->cha racters8(), index, searchLength, matchLength);

1086 return findIgnoringCaseInner(characters8() + index, matchString->charact ers16(), index, searchLength, matchLength);

1087 }

1088

1089 if (matchString->is8Bit())

1090 return findIgnoringCaseInner(characters16() + index, matchString->charac ters8(), index, searchLength, matchLength);

1091

1092 return findIgnoringCaseInner(characters16() + index, matchString->characters 16(), index, searchLength, matchLength);

1093 }

1094

1095 size_t StringImpl::findNextLineStart(unsigned index)

1096 {

1097 if (is8Bit())

1098 return WTF::findNextLineStart(characters8(), m_length, index);

1099 return WTF::findNextLineStart(characters16(), m_length, index);

1100 }

1101

1102 size_t StringImpl::reverseFind(UChar c, unsigned index)

1103 {

1104 if (is8Bit())

1105 return WTF::reverseFind(characters8(), m_length, c, index);

1106 return WTF::reverseFind(characters16(), m_length, c, index);

1107 }

1108

1109 template <typename SearchCharacterType, typename MatchCharacterType>

1110 ALWAYS_INLINE static size_t reverseFindInner(const SearchCharacterType* searchCh aracters, const MatchCharacterType* matchCharacters, unsigned index, unsigned le ngth, unsigned matchLength)

1111 {

1112 // Optimization: keep a running hash of the strings,

1113 // only call equal if the hashes match.

1114

1115 // delta is the number of additional times to test; delta == 0 means test on ly once.

1116 unsigned delta = min(index, length - matchLength);

1117

1118 unsigned searchHash = 0;

1119 unsigned matchHash = 0;

1120 for (unsigned i = 0; i < matchLength; ++i) {

1121 searchHash += searchCharacters[delta + i];

1122 matchHash += matchCharacters[i];

1123 }

1124

1125 // keep looping until we match

1126 while (searchHash != matchHash \|\| !equal(searchCharacters + delta, matchChar acters, matchLength)) {

1127 if (!delta)

1128 return notFound;

1129 --delta;

1130 searchHash -= searchCharacters[delta + matchLength];

1131 searchHash += searchCharacters[delta];

1132 }

1133 return delta;

1134 }

1135

1136 size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index)

1137 {

1138 // Check for null or empty string to match against

1139 if (!matchString)

1140 return notFound;

1141 unsigned matchLength = matchString->length();

1142 unsigned ourLength = length();

1143 if (!matchLength)

1144 return min(index, ourLength);

1145

1146 // Optimization 1: fast case for strings of length 1.

1147 if (matchLength == 1) {

1148 if (is8Bit())

1149 return WTF::reverseFind(characters8(), ourLength, (*matchString)[0], index);

1150 return WTF::reverseFind(characters16(), ourLength, (*matchString)[0], in dex);

1151 }

1152

1153 // Check index & matchLength are in range.

1154 if (matchLength > ourLength)

1155 return notFound;

1156

1157 if (is8Bit()) {

1158 if (matchString->is8Bit())

1159 return reverseFindInner(characters8(), matchString->characters8(), i ndex, ourLength, matchLength);

1160 return reverseFindInner(characters8(), matchString->characters16(), inde x, ourLength, matchLength);

1161 }

1162

1163 if (matchString->is8Bit())

1164 return reverseFindInner(characters16(), matchString->characters8(), inde x, ourLength, matchLength);

1165

1166 return reverseFindInner(characters16(), matchString->characters16(), index, ourLength, matchLength);

1167 }

1168

1169 template <typename SearchCharacterType, typename MatchCharacterType>

1170 ALWAYS_INLINE static size_t reverseFindIgnoringCaseInner(const SearchCharacterTy pe* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength)

1171 {

1172 // delta is the number of additional times to test; delta == 0 means test on ly once.

1173 unsigned delta = min(index, length - matchLength);

1174

1175 // keep looping until we match

1176 while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLe ngth)) {

1177 if (!delta)

1178 return notFound;

1179 --delta;

1180 }

1181 return delta;

1182 }

1183

1184 size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned ind ex)

1185 {

1186 // Check for null or empty string to match against

1187 if (!matchString)

1188 return notFound;

1189 unsigned matchLength = matchString->length();

1190 unsigned ourLength = length();

1191 if (!matchLength)

1192 return min(index, ourLength);

1193

1194 // Check index & matchLength are in range.

1195 if (matchLength > ourLength)

1196 return notFound;

1197

1198 if (is8Bit()) {

1199 if (matchString->is8Bit())

1200 return reverseFindIgnoringCaseInner(characters8(), matchString->char acters8(), index, ourLength, matchLength);

1201 return reverseFindIgnoringCaseInner(characters8(), matchString->characte rs16(), index, ourLength, matchLength);

1202 }

1203

1204 if (matchString->is8Bit())

1205 return reverseFindIgnoringCaseInner(characters16(), matchString->charact ers8(), index, ourLength, matchLength);

1206

1207 return reverseFindIgnoringCaseInner(characters16(), matchString->characters1 6(), index, ourLength, matchLength);

1208 }

1209

1210 ALWAYS_INLINE static bool equalInner(const StringImpl* stringImpl, unsigned star tOffset, const char* matchString, unsigned matchLength, bool caseSensitive)

1211 {

1212 ASSERT(stringImpl);

1213 ASSERT(matchLength <= stringImpl->length());

1214 ASSERT(startOffset + matchLength <= stringImpl->length());

1215

1216 if (caseSensitive) {

1217 if (stringImpl->is8Bit())

1218 return equal(stringImpl->characters8() + startOffset, reinterpret_ca st<const LChar*>(matchString), matchLength);

1219 return equal(stringImpl->characters16() + startOffset, reinterpret_cast< const LChar*>(matchString), matchLength);

1220 }

1221 if (stringImpl->is8Bit())

1222 return equalIgnoringCase(stringImpl->characters8() + startOffset, reinte rpret_cast<const LChar*>(matchString), matchLength);

1223 return equalIgnoringCase(stringImpl->characters16() + startOffset, reinterpr et_cast<const LChar*>(matchString), matchLength);

1224 }

1225

1226 bool StringImpl::startsWith(UChar character) const

1227 {

1228 return m_length && (*this)[0] == character;

1229 }

1230

1231 bool StringImpl::startsWith(const char* matchString, unsigned matchLength, bool caseSensitive) const

1232 {

1233 ASSERT(matchLength);

1234 if (matchLength > length())

1235 return false;

1236 return equalInner(this, 0, matchString, matchLength, caseSensitive);

1237 }

1238

1239 bool StringImpl::endsWith(StringImpl* matchString, bool caseSensitive)

1240 {

1241 ASSERT(matchString);

1242 if (m_length >= matchString->m_length) {

1243 unsigned start = m_length - matchString->m_length;

1244 return (caseSensitive ? find(matchString, start) : findIgnoringCase(matc hString, start)) == start;

1245 }

1246 return false;

1247 }

1248

1249 bool StringImpl::endsWith(UChar character) const

1250 {

1251 return m_length && (*this)[m_length - 1] == character;

1252 }

1253

1254 bool StringImpl::endsWith(const char* matchString, unsigned matchLength, bool ca seSensitive) const

1255 {

1256 ASSERT(matchLength);

1257 if (matchLength > length())

1258 return false;

1259 unsigned startOffset = length() - matchLength;

1260 return equalInner(this, startOffset, matchString, matchLength, caseSensitive );

1261 }

1262

1263 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC)

1264 {

1265 if (oldC == newC)

1266 return this;

1267 unsigned i;

1268 for (i = 0; i != m_length; ++i) {

1269 UChar c = is8Bit() ? m_data8[i] : m_data16[i];

1270 if (c == oldC)

1271 break;

1272 }

1273 if (i == m_length)

1274 return this;

1275

1276 if (is8Bit()) {

1277 if (oldC > 0xff)

1278 // Looking for a 16 bit char in an 8 bit string, we're done.

1279 return this;

1280

1281 if (newC <= 0xff) {

1282 LChar* data;

1283 LChar oldChar = static_cast<LChar>(oldC);

1284 LChar newChar = static_cast<LChar>(newC);

1285

1286 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);

1287

1288 for (i = 0; i != m_length; ++i) {

1289 LChar ch = m_data8[i];

1290 if (ch == oldChar)

1291 ch = newChar;

1292 data[i] = ch;

1293 }

1294 return newImpl.release();

1295 }

1296

1297 // There is the possibility we need to up convert from 8 to 16 bit,

1298 // create a 16 bit string for the result.

1299 UChar* data;

1300 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);

1301

1302 for (i = 0; i != m_length; ++i) {

1303 UChar ch = m_data8[i];

1304 if (ch == oldC)

1305 ch = newC;

1306 data[i] = ch;

1307 }

1308

1309 return newImpl.release();

1310 }

1311

1312 UChar* data;

1313 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);

1314

1315 for (i = 0; i != m_length; ++i) {

1316 UChar ch = m_data16[i];

1317 if (ch == oldC)

1318 ch = newC;

1319 data[i] = ch;

1320 }

1321 return newImpl.release();

1322 }

1323

1324 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToR eplace, StringImpl* str)

1325 {

1326 position = min(position, length());

1327 lengthToReplace = min(lengthToReplace, length() - position);

1328 unsigned lengthToInsert = str ? str->length() : 0;

1329 if (!lengthToReplace && !lengthToInsert)

1330 return this;

1331

1332 RELEASE_ASSERT((length() - lengthToReplace) < (numeric_limits<unsigned>::max () - lengthToInsert));

1333

1334 if (is8Bit() && (!str \|\| str->is8Bit())) {

1335 LChar* data;

1336 RefPtr<StringImpl> newImpl =

1337 createUninitialized(length() - lengthToReplace + lengthToInsert, data);

1338 memcpy(data, m_data8, position * sizeof(LChar));

1339 if (str)

1340 memcpy(data + position, str->m_data8, lengthToInsert * sizeof(LChar) );

1341 memcpy(data + position + lengthToInsert, m_data8 + position + lengthToRe place,

1342 (length() - position - lengthToReplace) * sizeof(LChar));

1343 return newImpl.release();

1344 }

1345 UChar* data;

1346 RefPtr<StringImpl> newImpl =

1347 createUninitialized(length() - lengthToReplace + lengthToInsert, data);

1348 if (is8Bit())

1349 for (unsigned i = 0; i < position; ++i)

1350 data[i] = m_data8[i];

1351 else

1352 memcpy(data, m_data16, position * sizeof(UChar));

1353 if (str) {

1354 if (str->is8Bit())

1355 for (unsigned i = 0; i < lengthToInsert; ++i)

1356 data[i + position] = str->m_data8[i];

1357 else

1358 memcpy(data + position, str->m_data16, lengthToInsert * sizeof(UChar ));

1359 }

1360 if (is8Bit()) {

1361 for (unsigned i = 0; i < length() - position - lengthToReplace; ++i)

1362 data[i + position + lengthToInsert] = m_data8[i + position + lengthT oReplace];

1363 } else {

1364 memcpy(data + position + lengthToInsert, characters() + position + lengt hToReplace,

1365 (length() - position - lengthToReplace) * sizeof(UChar));

1366 }

1367 return newImpl.release();

1368 }

1369

1370 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacemen t)

1371 {

1372 if (!replacement)

1373 return this;

1374

1375 if (replacement->is8Bit())

1376 return replace(pattern, replacement->m_data8, replacement->length());

1377

1378 return replace(pattern, replacement->m_data16, replacement->length());

1379 }

1380

1381 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const LChar* replaceme nt, unsigned repStrLength)

1382 {

1383 ASSERT(replacement);

1384

1385 size_t srcSegmentStart = 0;

1386 unsigned matchCount = 0;

1387

1388 // Count the matches.

1389 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) {

1390 ++matchCount;

1391 ++srcSegmentStart;

1392 }

1393

1394 // If we have 0 matches then we don't have to do any more work.

1395 if (!matchCount)

1396 return this;

1397

1398 RELEASE_ASSERT(!repStrLength \|\| matchCount <= numeric_limits<unsigned>::max( ) / repStrLength);

1399

1400 unsigned replaceSize = matchCount * repStrLength;

1401 unsigned newSize = m_length - matchCount;

1402 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));

1403

1404 newSize += replaceSize;

1405

1406 // Construct the new data.

1407 size_t srcSegmentEnd;

1408 unsigned srcSegmentLength;

1409 srcSegmentStart = 0;

1410 unsigned dstOffset = 0;

1411

1412 if (is8Bit()) {

1413 LChar* data;

1414 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);

1415

1416 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {

1417 srcSegmentLength = srcSegmentEnd - srcSegmentStart;

1418 memcpy(data + dstOffset, m_data8 + srcSegmentStart, srcSegmentLength * sizeof(LChar));

1419 dstOffset += srcSegmentLength;

1420 memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar));

1421 dstOffset += repStrLength;

1422 srcSegmentStart = srcSegmentEnd + 1;

1423 }

1424

1425 srcSegmentLength = m_length - srcSegmentStart;

1426 memcpy(data + dstOffset, m_data8 + srcSegmentStart, srcSegmentLength * s izeof(LChar));

1427

1428 ASSERT(dstOffset + srcSegmentLength == newImpl->length());

1429

1430 return newImpl.release();

1431 }

1432

1433 UChar* data;

1434 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);

1435

1436 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {

1437 srcSegmentLength = srcSegmentEnd - srcSegmentStart;

1438 memcpy(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength * sizeof(UChar));

1439

1440 dstOffset += srcSegmentLength;

1441 for (unsigned i = 0; i < repStrLength; ++i)

1442 data[i + dstOffset] = replacement[i];

1443

1444 dstOffset += repStrLength;

1445 srcSegmentStart = srcSegmentEnd + 1;

1446 }

1447

1448 srcSegmentLength = m_length - srcSegmentStart;

1449 memcpy(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength * size of(UChar));

1450

1451 ASSERT(dstOffset + srcSegmentLength == newImpl->length());

1452

1453 return newImpl.release();

1454 }

1455

1456 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const UChar* replaceme nt, unsigned repStrLength)

1457 {

1458 ASSERT(replacement);

1459

1460 size_t srcSegmentStart = 0;

1461 unsigned matchCount = 0;

1462

1463 // Count the matches.

1464 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) {

1465 ++matchCount;

1466 ++srcSegmentStart;

1467 }

1468

1469 // If we have 0 matches then we don't have to do any more work.

1470 if (!matchCount)

1471 return this;

1472

1473 RELEASE_ASSERT(!repStrLength \|\| matchCount <= numeric_limits<unsigned>::max( ) / repStrLength);

1474

1475 unsigned replaceSize = matchCount * repStrLength;

1476 unsigned newSize = m_length - matchCount;

1477 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));

1478

1479 newSize += replaceSize;

1480

1481 // Construct the new data.

1482 size_t srcSegmentEnd;

1483 unsigned srcSegmentLength;

1484 srcSegmentStart = 0;

1485 unsigned dstOffset = 0;

1486

1487 if (is8Bit()) {

1488 UChar* data;

1489 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);

1490

1491 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {

1492 srcSegmentLength = srcSegmentEnd - srcSegmentStart;

1493 for (unsigned i = 0; i < srcSegmentLength; ++i)

1494 data[i + dstOffset] = m_data8[i + srcSegmentStart];

1495

1496 dstOffset += srcSegmentLength;

1497 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));

1498

1499 dstOffset += repStrLength;

1500 srcSegmentStart = srcSegmentEnd + 1;

1501 }

1502

1503 srcSegmentLength = m_length - srcSegmentStart;

1504 for (unsigned i = 0; i < srcSegmentLength; ++i)

1505 data[i + dstOffset] = m_data8[i + srcSegmentStart];

1506

1507 ASSERT(dstOffset + srcSegmentLength == newImpl->length());

1508

1509 return newImpl.release();

1510 }

1511

1512 UChar* data;

1513 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);

1514

1515 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {

1516 srcSegmentLength = srcSegmentEnd - srcSegmentStart;

1517 memcpy(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength * sizeof(UChar));

1518

1519 dstOffset += srcSegmentLength;

1520 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));

1521

1522 dstOffset += repStrLength;

1523 srcSegmentStart = srcSegmentEnd + 1;

1524 }

1525

1526 srcSegmentLength = m_length - srcSegmentStart;

1527 memcpy(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength * size of(UChar));

1528

1529 ASSERT(dstOffset + srcSegmentLength == newImpl->length());

1530

1531 return newImpl.release();

1532 }

1533

1534 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* repl acement)

1535 {

1536 if (!pattern \|\| !replacement)

1537 return this;

1538

1539 unsigned patternLength = pattern->length();

1540 if (!patternLength)

1541 return this;

1542

1543 unsigned repStrLength = replacement->length();

1544 size_t srcSegmentStart = 0;

1545 unsigned matchCount = 0;

1546

1547 // Count the matches.

1548 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) {

1549 ++matchCount;

1550 srcSegmentStart += patternLength;

1551 }

1552

1553 // If we have 0 matches, we don't have to do any more work

1554 if (!matchCount)

1555 return this;

1556

1557 unsigned newSize = m_length - matchCount * patternLength;

1558 RELEASE_ASSERT(!repStrLength \|\| matchCount <= numeric_limits<unsigned>::max( ) / repStrLength);

1559

1560 RELEASE_ASSERT(newSize <= (numeric_limits<unsigned>::max() - matchCount * re pStrLength));

1561

1562 newSize += matchCount * repStrLength;

1563

1564

1565 // Construct the new data

1566 size_t srcSegmentEnd;

1567 unsigned srcSegmentLength;

1568 srcSegmentStart = 0;

1569 unsigned dstOffset = 0;

1570 bool srcIs8Bit = is8Bit();

1571 bool replacementIs8Bit = replacement->is8Bit();

1572

1573 // There are 4 cases:

1574 // 1. This and replacement are both 8 bit.

1575 // 2. This and replacement are both 16 bit.

1576 // 3. This is 8 bit and replacement is 16 bit.

1577 // 4. This is 16 bit and replacement is 8 bit.

1578 if (srcIs8Bit && replacementIs8Bit) {

1579 // Case 1

1580 LChar* data;

1581 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);

1582 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {

1583 srcSegmentLength = srcSegmentEnd - srcSegmentStart;

1584 memcpy(data + dstOffset, m_data8 + srcSegmentStart, srcSegmentLength * sizeof(LChar));

1585 dstOffset += srcSegmentLength;

1586 memcpy(data + dstOffset, replacement->m_data8, repStrLength * sizeof (LChar));

1587 dstOffset += repStrLength;

1588 srcSegmentStart = srcSegmentEnd + patternLength;

1589 }

1590

1591 srcSegmentLength = m_length - srcSegmentStart;

1592 memcpy(data + dstOffset, m_data8 + srcSegmentStart, srcSegmentLength * s izeof(LChar));

1593

1594 ASSERT(dstOffset + srcSegmentLength == newImpl->length());

1595

1596 return newImpl.release();

1597 }

1598

1599 UChar* data;

1600 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);

1601 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {

1602 srcSegmentLength = srcSegmentEnd - srcSegmentStart;

1603 if (srcIs8Bit) {

1604 // Case 3.

1605 for (unsigned i = 0; i < srcSegmentLength; ++i)

1606 data[i + dstOffset] = m_data8[i + srcSegmentStart];

1607 } else {

1608 // Case 2 & 4.

1609 memcpy(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLengt h * sizeof(UChar));

1610 }

1611 dstOffset += srcSegmentLength;

1612 if (replacementIs8Bit) {

1613 // Cases 2 & 3.

1614 for (unsigned i = 0; i < repStrLength; ++i)

1615 data[i + dstOffset] = replacement->m_data8[i];

1616 } else {

1617 // Case 4

1618 memcpy(data + dstOffset, replacement->m_data16, repStrLength * sizeo f(UChar));

1619 }

1620 dstOffset += repStrLength;

1621 srcSegmentStart = srcSegmentEnd + patternLength;

1622 }

1623

1624 srcSegmentLength = m_length - srcSegmentStart;

1625 if (srcIs8Bit) {

1626 // Case 3.

1627 for (unsigned i = 0; i < srcSegmentLength; ++i)

1628 data[i + dstOffset] = m_data8[i + srcSegmentStart];

1629 } else {

1630 // Cases 2 & 4.

1631 memcpy(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength * sizeof(UChar));

1632 }

1633

1634 ASSERT(dstOffset + srcSegmentLength == newImpl->length());

1635

1636 return newImpl.release();

1637 }

1638

1639 static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl* b)

1640 {

1641 unsigned aLength = a->length();

1642 unsigned bLength = b->length();

1643 if (aLength != bLength)

1644 return false;

1645

1646 if (a->is8Bit()) {

1647 if (b->is8Bit())

1648 return equal(a->characters8(), b->characters8(), aLength);

1649

1650 return equal(a->characters8(), b->characters16(), aLength);

1651 }

1652

1653 if (b->is8Bit())

1654 return equal(a->characters16(), b->characters8(), aLength);

1655

1656 return equal(a->characters16(), b->characters16(), aLength);

1657 }

1658

1659 bool equal(const StringImpl* a, const StringImpl* b)

1660 {

1661 if (a == b)

1662 return true;

1663 if (!a \|\| !b)

1664 return false;

1665

1666 return stringImplContentEqual(a, b);

1667 }

1668

1669 bool equal(const StringImpl* a, const LChar* b, unsigned length)

1670 {

1671 if (!a)

1672 return !b;

1673 if (!b)

1674 return !a;

1675

1676 if (length != a->length())

1677 return false;

1678

1679 if (a->is8Bit())

1680 return equal(a->characters8(), b, length);

1681 return equal(a->characters16(), b, length);

1682 }

1683

1684 bool equal(const StringImpl* a, const LChar* b)

1685 {

1686 if (!a)

1687 return !b;

1688 if (!b)

1689 return !a;

1690

1691 unsigned length = a->length();

1692

1693 if (a->is8Bit()) {

1694 const LChar* aPtr = a->characters8();

1695 for (unsigned i = 0; i != length; ++i) {

1696 LChar bc = b[i];

1697 LChar ac = aPtr[i];

1698 if (!bc)

1699 return false;

1700 if (ac != bc)

1701 return false;

1702 }

1703

1704 return !b[length];

1705 }

1706

1707 const UChar* aPtr = a->characters16();

1708 for (unsigned i = 0; i != length; ++i) {

1709 LChar bc = b[i];

1710 if (!bc)

1711 return false;

1712 if (aPtr[i] != bc)

1713 return false;

1714 }

1715

1716 return !b[length];

1717 }

1718

1719 bool equal(const StringImpl* a, const UChar* b, unsigned length)

1720 {

1721 if (!a)

1722 return !b;

1723 if (!b)

1724 return false;

1725

1726 if (a->length() != length)

1727 return false;

1728 if (a->is8Bit())

1729 return equal(a->characters8(), b, length);

1730 return equal(a->characters16(), b, length);

1731 }

1732

1733 bool equalNonNull(const StringImpl* a, const StringImpl* b)

1734 {

1735 ASSERT(a && b);

1736 if (a == b)

1737 return true;

1738

1739 return stringImplContentEqual(a, b);

1740 }

1741

1742 bool equalIgnoringCase(const StringImpl* a, const StringImpl* b)

1743 {

1744 if (a == b)

1745 return true;

1746 if (!a \|\| !b)

1747 return false;

1748

1749 return CaseFoldingHash::equal(a, b);

1750 }

1751

1752 bool equalIgnoringCase(const StringImpl* a, const LChar* b)

1753 {

1754 if (!a)

1755 return !b;

1756 if (!b)

1757 return !a;

1758

1759 unsigned length = a->length();

1760

1761 // Do a faster loop for the case where all the characters are ASCII.

1762 UChar ored = 0;

1763 bool equal = true;

1764 if (a->is8Bit()) {

1765 const LChar* as = a->characters8();

1766 for (unsigned i = 0; i != length; ++i) {

1767 LChar bc = b[i];

1768 if (!bc)

1769 return false;

1770 UChar ac = as[i];

1771 ored \|= ac;

1772 equal = equal && (toASCIILower(ac) == toASCIILower(bc));

1773 }

1774

1775 // Do a slower implementation for cases that include non-ASCII character s.

1776 if (ored & ~0x7F) {

1777 equal = true;

1778 for (unsigned i = 0; i != length; ++i)

1779 equal = equal && (foldCase(as[i]) == foldCase(b[i]));

1780 }

1781

1782 return equal && !b[length];

1783 }

1784

1785 const UChar* as = a->characters16();

1786 for (unsigned i = 0; i != length; ++i) {

1787 LChar bc = b[i];

1788 if (!bc)

1789 return false;

1790 UChar ac = as[i];

1791 ored \|= ac;

1792 equal = equal && (toASCIILower(ac) == toASCIILower(bc));

1793 }

1794

1795 // Do a slower implementation for cases that include non-ASCII characters.

1796 if (ored & ~0x7F) {

1797 equal = true;

1798 for (unsigned i = 0; i != length; ++i) {

1799 equal = equal && (foldCase(as[i]) == foldCase(b[i]));

1800 }

1801 }

1802

1803 return equal && !b[length];

1804 }

1805

1806 bool equalIgnoringCaseNonNull(const StringImpl* a, const StringImpl* b)

1807 {

1808 ASSERT(a && b);

1809 if (a == b)

1810 return true;

1811

1812 unsigned length = a->length();

1813 if (length != b->length())

1814 return false;

1815

1816 if (a->is8Bit()) {

1817 if (b->is8Bit())

1818 return equalIgnoringCase(a->characters8(), b->characters8(), length) ;

1819

1820 return equalIgnoringCase(b->characters16(), a->characters8(), length);

1821 }

1822

1823 if (b->is8Bit())

1824 return equalIgnoringCase(a->characters16(), b->characters8(), length);

1825

1826 return equalIgnoringCase(a->characters16(), b->characters16(), length);

1827 }

1828

1829 bool equalIgnoringNullity(StringImpl* a, StringImpl* b)

1830 {

1831 if (!a && b && !b->length())

1832 return true;

1833 if (!b && a && !a->length())

1834 return true;

1835 return equal(a, b);

1836 }

1837

1838 WTF::Unicode::Direction StringImpl::defaultWritingDirection(bool* hasStrongDirec tionality)

1839 {

1840 for (unsigned i = 0; i < m_length; ++i) {

1841 WTF::Unicode::Direction charDirection = WTF::Unicode::direction(is8Bit() ? m_data8[i] : m_data16[i]);

1842 if (charDirection == WTF::Unicode::LeftToRight) {

1843 if (hasStrongDirectionality)

1844 *hasStrongDirectionality = true;

1845 return WTF::Unicode::LeftToRight;

1846 }

1847 if (charDirection == WTF::Unicode::RightToLeft \|\| charDirection == WTF:: Unicode::RightToLeftArabic) {

1848 if (hasStrongDirectionality)

1849 *hasStrongDirectionality = true;

1850 return WTF::Unicode::RightToLeft;

1851 }

1852 }

1853 if (hasStrongDirectionality)

1854 *hasStrongDirectionality = false;

1855 return WTF::Unicode::LeftToRight;

1856 }

1857

1858 PassRefPtr<StringImpl> StringImpl::adopt(StringBuffer<LChar>& buffer)

1859 {

1860 unsigned length = buffer.length();

1861 if (!length)

1862 return empty();

1863 return adoptRef(new StringImpl(buffer.release(), length));

1864 }

1865

1866 PassRefPtr<StringImpl> StringImpl::adopt(StringBuffer<UChar>& buffer)

1867 {

1868 unsigned length = buffer.length();

1869 if (!length)

1870 return empty();

1871 return adoptRef(new StringImpl(buffer.release(), length));

1872 }

1873

1874 PassRefPtr<StringImpl> StringImpl::createWithTerminatingNullCharacter(const Stri ngImpl& string)

1875 {

1876 // Use createUninitialized instead of 'new StringImpl' so that the string an d its buffer

1877 // get allocated in a single memory block.

1878 unsigned length = string.m_length;

1879 RELEASE_ASSERT(length < numeric_limits<unsigned>::max());

1880 RefPtr<StringImpl> terminatedString;

1881 if (string.is8Bit()) {

1882 LChar* data;

1883 terminatedString = createUninitialized(length + 1, data);

1884 memcpy(data, string.m_data8, length * sizeof(LChar));

1885 data[length] = 0;

1886 } else {

1887 UChar* data;

1888 terminatedString = createUninitialized(length + 1, data);

1889 memcpy(data, string.m_data16, length * sizeof(UChar));

1890 data[length] = 0;

1891 }

1892 --(terminatedString->m_length);

1893 terminatedString->m_hashAndFlags = (string.m_hashAndFlags & (~s_flagMask \| s _hashFlag8BitBuffer)) \| s_hashFlagHasTerminatingNullCharacter;

1894 return terminatedString.release();

1895 }

1896

1897 size_t StringImpl::sizeInBytes() const

1898 {

1899 // FIXME: support substrings

1900 size_t size = length();

1901 if (is8Bit()) {

1902 if (has16BitShadow()) {

1903 size += 2 * size;

1904 if (hasTerminatingNullCharacter())

1905 size += 2;

1906 }

1907 } else

1908 size *= 2;

1909 return size + sizeof(*this);

1910 }

1911

1912 } // namespace WTF

OLD	NEW

« no previous file with comments | « Source/WTF/wtf/text/StringImpl.h ('k') | Source/WTF/wtf/text/StringOperators.h » ('j') | Source/config.h » ('J')