Source/WTF/wtf/text/WTFString.cpp - Issue 14238015: Move Source/WTF/wtf to Source/wtf

Side by Side Diff: Source/WTF/wtf/text/WTFString.cpp

Issue 14238015: Move Source/WTF/wtf to Source/wtf (Closed) Base URL: svn://svn.chromium.org/blink/trunk

Patch Set: Created 7 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 /*

2 * (C) 1999 Lars Knoll (knoll@kde.org)

3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010, 2012 Apple Inc. All rights reserved.

4 * Copyright (C) 2007-2009 Torch Mobile, Inc.

5 *

6 * This library is free software; you can redistribute it and/or

7 * modify it under the terms of the GNU Library General Public

8 * License as published by the Free Software Foundation; either

9 * version 2 of the License, or (at your option) any later version.

10 *

11 * This library is distributed in the hope that it will be useful,

12 * but WITHOUT ANY WARRANTY; without even the implied warranty of

13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

14 * Library General Public License for more details.

15 *

16 * You should have received a copy of the GNU Library General Public License

17 * along with this library; see the file COPYING.LIB. If not, write to

18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,

19 * Boston, MA 02110-1301, USA.

20 */

21

22 #include "config.h"

23 #include "WTFString.h"

24

25 #include "IntegerToStringConversion.h"

26 #include <stdarg.h>

27 #include <wtf/ASCIICType.h>

28 #include <wtf/DataLog.h>

29 #include <wtf/HexNumber.h>

30 #include <wtf/MathExtras.h>

31 #include <wtf/text/CString.h>

32 #include <wtf/StringExtras.h>

33 #include <wtf/Vector.h>

34 #include <wtf/dtoa.h>

35 #include <wtf/unicode/CharacterNames.h>

36 #include <wtf/unicode/UTF8.h>

37 #include <wtf/unicode/Unicode.h>

38

39 using namespace std;

40

41 namespace WTF {

42

43 using namespace Unicode;

44 using namespace std;

45

46 // Construct a string with UTF-16 data.

47 String::String(const UChar* characters, unsigned length)

48 : m_impl(characters ? StringImpl::create(characters, length) : 0)

49 {

50 }

51

52 // Construct a string with UTF-16 data, from a null-terminated source.

53 String::String(const UChar* str)

54 {

55 if (!str)

56 return;

57

58 size_t len = 0;

59 while (str[len] != UChar(0))

60 ++len;

61

62 RELEASE_ASSERT(len <= numeric_limits<unsigned>::max());

63

64 m_impl = StringImpl::create(str, len);

65 }

66

67 // Construct a string with latin1 data.

68 String::String(const LChar* characters, unsigned length)

69 : m_impl(characters ? StringImpl::create(characters, length) : 0)

70 {

71 }

72

73 String::String(const char* characters, unsigned length)

74 : m_impl(characters ? StringImpl::create(reinterpret_cast<const LChar*>(char acters), length) : 0)

75 {

76 }

77

78 // Construct a string with latin1 data, from a null-terminated source.

79 String::String(const LChar* characters)

80 : m_impl(characters ? StringImpl::create(characters) : 0)

81 {

82 }

83

84 String::String(const char* characters)

85 : m_impl(characters ? StringImpl::create(reinterpret_cast<const LChar*>(char acters)) : 0)

86 {

87 }

88

89 String::String(ASCIILiteral characters)

90 : m_impl(StringImpl::createFromLiteral(characters))

91 {

92 }

93

94 void String::append(const String& str)

95 {

96 if (str.isEmpty())

97 return;

98

99 // FIXME: This is extremely inefficient. So much so that we might want to ta ke this

100 // out of String's API. We can make it better by optimizing the case where e xactly

101 // one String is pointing at this StringImpl, but even then it's going to re quire a

102 // call to fastMalloc every single time.

103 if (str.m_impl) {

104 if (m_impl) {

105 if (m_impl->is8Bit() && str.m_impl->is8Bit()) {

106 LChar* data;

107 RELEASE_ASSERT(str.length() <= numeric_limits<unsigned>::max() - m_impl->length());

108 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_i mpl->length() + str.length(), data);

109 memcpy(data, m_impl->characters8(), m_impl->length() * sizeof(LC har));

110 memcpy(data + m_impl->length(), str.characters8(), str.length() * sizeof(LChar));

111 m_impl = newImpl.release();

112 return;

113 }

114 UChar* data;

115 RELEASE_ASSERT(str.length() <= numeric_limits<unsigned>::max() - m_i mpl->length());

116 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl- >length() + str.length(), data);

117 memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar)) ;

118 memcpy(data + m_impl->length(), str.characters(), str.length() * siz eof(UChar));

119 m_impl = newImpl.release();

120 } else

121 m_impl = str.m_impl;

122 }

123 }

124

125 void String::append(LChar c)

126 {

127 // FIXME: This is extremely inefficient. So much so that we might want to ta ke this

128 // out of String's API. We can make it better by optimizing the case where e xactly

129 // one String is pointing at this StringImpl, but even then it's going to re quire a

130 // call to fastMalloc every single time.

131 if (m_impl) {

132 UChar* data;

133 RELEASE_ASSERT(m_impl->length() < numeric_limits<unsigned>::max());

134 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->len gth() + 1, data);

135 memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar));

136 data[m_impl->length()] = c;

137 m_impl = newImpl.release();

138 } else

139 m_impl = StringImpl::create(&c, 1);

140 }

141

142 void String::append(UChar c)

143 {

144 // FIXME: This is extremely inefficient. So much so that we might want to ta ke this

145 // out of String's API. We can make it better by optimizing the case where e xactly

146 // one String is pointing at this StringImpl, but even then it's going to re quire a

147 // call to fastMalloc every single time.

148 if (m_impl) {

149 UChar* data;

150 RELEASE_ASSERT(m_impl->length() < numeric_limits<unsigned>::max());

151 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->len gth() + 1, data);

152 memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar));

153 data[m_impl->length()] = c;

154 m_impl = newImpl.release();

155 } else

156 m_impl = StringImpl::create(&c, 1);

157 }

158

159 int codePointCompare(const String& a, const String& b)

160 {

161 return codePointCompare(a.impl(), b.impl());

162 }

163

164 void String::insert(const String& str, unsigned pos)

165 {

166 if (str.isEmpty()) {

167 if (str.isNull())

168 return;

169 if (isNull())

170 m_impl = str.impl();

171 return;

172 }

173 insert(str.characters(), str.length(), pos);

174 }

175

176 void String::append(const LChar* charactersToAppend, unsigned lengthToAppend)

177 {

178 if (!m_impl) {

179 if (!charactersToAppend)

180 return;

181 m_impl = StringImpl::create(charactersToAppend, lengthToAppend);

182 return;

183 }

184

185 if (!lengthToAppend)

186 return;

187

188 ASSERT(charactersToAppend);

189

190 unsigned strLength = m_impl->length();

191

192 if (m_impl->is8Bit()) {

193 RELEASE_ASSERT(lengthToAppend <= numeric_limits<unsigned>::max() - strLe ngth);

194 LChar* data;

195 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(strLength + lengthToAppend, data);

196 StringImpl::copyChars(data, m_impl->characters8(), strLength);

197 StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppe nd);

198 m_impl = newImpl.release();

199 return;

200 }

201

202 RELEASE_ASSERT(lengthToAppend <= numeric_limits<unsigned>::max() - strLength );

203 UChar* data;

204 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() + leng thToAppend, data);

205 StringImpl::copyChars(data, m_impl->characters16(), strLength);

206 StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppend);

207 m_impl = newImpl.release();

208 }

209

210 void String::append(const UChar* charactersToAppend, unsigned lengthToAppend)

211 {

212 if (!m_impl) {

213 if (!charactersToAppend)

214 return;

215 m_impl = StringImpl::create(charactersToAppend, lengthToAppend);

216 return;

217 }

218

219 if (!lengthToAppend)

220 return;

221

222 unsigned strLength = m_impl->length();

223

224 ASSERT(charactersToAppend);

225 RELEASE_ASSERT(lengthToAppend <= numeric_limits<unsigned>::max() - strLength );

226 UChar* data;

227 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(strLength + len gthToAppend, data);

228 if (m_impl->is8Bit())

229 StringImpl::copyChars(data, characters8(), strLength);

230 else

231 StringImpl::copyChars(data, characters16(), strLength);

232 StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppend);

233 m_impl = newImpl.release();

234 }

235

236

237 void String::insert(const UChar* charactersToInsert, unsigned lengthToInsert, un signed position)

238 {

239 if (position >= length()) {

240 append(charactersToInsert, lengthToInsert);

241 return;

242 }

243

244 ASSERT(m_impl);

245

246 if (!lengthToInsert)

247 return;

248

249 ASSERT(charactersToInsert);

250 UChar* data;

251 RELEASE_ASSERT(lengthToInsert <= numeric_limits<unsigned>::max() - length()) ;

252 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() + leng thToInsert, data);

253 memcpy(data, characters(), position * sizeof(UChar));

254 memcpy(data + position, charactersToInsert, lengthToInsert * sizeof(UChar));

255 memcpy(data + position + lengthToInsert, characters() + position, (length() - position) * sizeof(UChar));

256 m_impl = newImpl.release();

257 }

258

259 UChar32 String::characterStartingAt(unsigned i) const

260 {

261 if (!m_impl \|\| i >= m_impl->length())

262 return 0;

263 return m_impl->characterStartingAt(i);

264 }

265

266 void String::truncate(unsigned position)

267 {

268 if (position >= length())

269 return;

270 UChar* data;

271 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(position, data) ;

272 memcpy(data, characters(), position * sizeof(UChar));

273 m_impl = newImpl.release();

274 }

275

276 template <typename CharacterType>

277 inline void String::removeInternal(const CharacterType* characters, unsigned pos ition, int lengthToRemove)

278 {

279 CharacterType* data;

280 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() - leng thToRemove, data);

281 memcpy(data, characters, position * sizeof(CharacterType));

282 memcpy(data + position, characters + position + lengthToRemove,

283 (length() - lengthToRemove - position) * sizeof(CharacterType));

284

285 m_impl = newImpl.release();

286 }

287

288 void String::remove(unsigned position, int lengthToRemove)

289 {

290 if (lengthToRemove <= 0)

291 return;

292 if (position >= length())

293 return;

294 if (static_cast<unsigned>(lengthToRemove) > length() - position)

295 lengthToRemove = length() - position;

296

297 if (is8Bit()) {

298 removeInternal(characters8(), position, lengthToRemove);

299

300 return;

301 }

302

303 removeInternal(characters16(), position, lengthToRemove);

304 }

305

306 String String::substring(unsigned pos, unsigned len) const

307 {

308 if (!m_impl)

309 return String();

310 return m_impl->substring(pos, len);

311 }

312

313 String String::substringSharingImpl(unsigned offset, unsigned length) const

314 {

315 // FIXME: We used to check against a limit of Heap::minExtraCost / sizeof(UC har).

316

317 unsigned stringLength = this->length();

318 offset = min(offset, stringLength);

319 length = min(length, stringLength - offset);

320

321 if (!offset && length == stringLength)

322 return *this;

323 return String(StringImpl::create(m_impl, offset, length));

324 }

325

326 String String::lower() const

327 {

328 if (!m_impl)

329 return String();

330 return m_impl->lower();

331 }

332

333 String String::upper() const

334 {

335 if (!m_impl)

336 return String();

337 return m_impl->upper();

338 }

339

340 String String::stripWhiteSpace() const

341 {

342 if (!m_impl)

343 return String();

344 return m_impl->stripWhiteSpace();

345 }

346

347 String String::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace) const

348 {

349 if (!m_impl)

350 return String();

351 return m_impl->stripWhiteSpace(isWhiteSpace);

352 }

353

354 String String::simplifyWhiteSpace() const

355 {

356 if (!m_impl)

357 return String();

358 return m_impl->simplifyWhiteSpace();

359 }

360

361 String String::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace) const

362 {

363 if (!m_impl)

364 return String();

365 return m_impl->simplifyWhiteSpace(isWhiteSpace);

366 }

367

368 String String::removeCharacters(CharacterMatchFunctionPtr findMatch) const

369 {

370 if (!m_impl)

371 return String();

372 return m_impl->removeCharacters(findMatch);

373 }

374

375 String String::foldCase() const

376 {

377 if (!m_impl)

378 return String();

379 return m_impl->foldCase();

380 }

381

382 bool String::percentage(int& result) const

383 {

384 if (!m_impl \|\| !m_impl->length())

385 return false;

386

387 if ((*m_impl)[m_impl->length() - 1] != '%')

388 return false;

389

390 result = charactersToIntStrict(m_impl->characters(), m_impl->length() - 1);

391 return true;

392 }

393

394 const UChar* String::charactersWithNullTermination()

395 {

396 if (!m_impl)

397 return 0;

398 if (m_impl->hasTerminatingNullCharacter())

399 return m_impl->characters();

400 m_impl = StringImpl::createWithTerminatingNullCharacter(*m_impl);

401 return m_impl->characters();

402 }

403

404 String String::format(const char *format, ...)

405 {

406 #if OS(WINCE)

407 va_list args;

408 va_start(args, format);

409

410 Vector<char, 256> buffer;

411

412 int bufferSize = 256;

413 buffer.resize(bufferSize);

414 for (;;) {

415 int written = vsnprintf(buffer.data(), bufferSize, format, args);

416 va_end(args);

417

418 if (written == 0)

419 return String("");

420 if (written > 0)

421 return StringImpl::create(reinterpret_cast<const LChar*>(buffer.data ()), written);

422

423 bufferSize <<= 1;

424 buffer.resize(bufferSize);

425 va_start(args, format);

426 }

427

428 #else

429 va_list args;

430 va_start(args, format);

431

432 Vector<char, 256> buffer;

433

434 // Do the format once to get the length.

435 #if COMPILER(MSVC)

436 int result = _vscprintf(format, args);

437 #else

438 char ch;

439 int result = vsnprintf(&ch, 1, format, args);

440 // We need to call va_end() and then va_start() again here, as the

441 // contents of args is undefined after the call to vsnprintf

442 // according to http://man.cx/snprintf(3)

443 //

444 // Not calling va_end/va_start here happens to work on lots of

445 // systems, but fails e.g. on 64bit Linux.

446 va_end(args);

447 va_start(args, format);

448 #endif

449

450 if (result == 0)

451 return String("");

452 if (result < 0)

453 return String();

454 unsigned len = result;

455 buffer.grow(len + 1);

456

457 // Now do the formatting again, guaranteed to fit.

458 vsnprintf(buffer.data(), buffer.size(), format, args);

459

460 va_end(args);

461

462 return StringImpl::create(reinterpret_cast<const LChar*>(buffer.data()), len );

463 #endif

464 }

465

466 String String::number(int number)

467 {

468 return numberToStringSigned<String>(number);

469 }

470

471 String String::number(unsigned int number)

472 {

473 return numberToStringUnsigned<String>(number);

474 }

475

476 String String::number(long number)

477 {

478 return numberToStringSigned<String>(number);

479 }

480

481 String String::number(unsigned long number)

482 {

483 return numberToStringUnsigned<String>(number);

484 }

485

486 String String::number(long long number)

487 {

488 return numberToStringSigned<String>(number);

489 }

490

491 String String::number(unsigned long long number)

492 {

493 return numberToStringUnsigned<String>(number);

494 }

495

496 String String::number(double number, unsigned precision, TrailingZerosTruncating Policy trailingZerosTruncatingPolicy)

497 {

498 NumberToStringBuffer buffer;

499 return String(numberToFixedPrecisionString(number, precision, buffer, traili ngZerosTruncatingPolicy == TruncateTrailingZeros));

500 }

501

502 String String::numberToStringECMAScript(double number)

503 {

504 NumberToStringBuffer buffer;

505 return String(numberToString(number, buffer));

506 }

507

508 String String::numberToStringFixedWidth(double number, unsigned decimalPlaces)

509 {

510 NumberToStringBuffer buffer;

511 return String(numberToFixedWidthString(number, decimalPlaces, buffer));

512 }

513

514 int String::toIntStrict(bool* ok, int base) const

515 {

516 if (!m_impl) {

517 if (ok)

518 *ok = false;

519 return 0;

520 }

521 return m_impl->toIntStrict(ok, base);

522 }

523

524 unsigned String::toUIntStrict(bool* ok, int base) const

525 {

526 if (!m_impl) {

527 if (ok)

528 *ok = false;

529 return 0;

530 }

531 return m_impl->toUIntStrict(ok, base);

532 }

533

534 int64_t String::toInt64Strict(bool* ok, int base) const

535 {

536 if (!m_impl) {

537 if (ok)

538 *ok = false;

539 return 0;

540 }

541 return m_impl->toInt64Strict(ok, base);

542 }

543

544 uint64_t String::toUInt64Strict(bool* ok, int base) const

545 {

546 if (!m_impl) {

547 if (ok)

548 *ok = false;

549 return 0;

550 }

551 return m_impl->toUInt64Strict(ok, base);

552 }

553

554 intptr_t String::toIntPtrStrict(bool* ok, int base) const

555 {

556 if (!m_impl) {

557 if (ok)

558 *ok = false;

559 return 0;

560 }

561 return m_impl->toIntPtrStrict(ok, base);

562 }

563

564 int String::toInt(bool* ok) const

565 {

566 if (!m_impl) {

567 if (ok)

568 *ok = false;

569 return 0;

570 }

571 return m_impl->toInt(ok);

572 }

573

574 unsigned String::toUInt(bool* ok) const

575 {

576 if (!m_impl) {

577 if (ok)

578 *ok = false;

579 return 0;

580 }

581 return m_impl->toUInt(ok);

582 }

583

584 int64_t String::toInt64(bool* ok) const

585 {

586 if (!m_impl) {

587 if (ok)

588 *ok = false;

589 return 0;

590 }

591 return m_impl->toInt64(ok);

592 }

593

594 uint64_t String::toUInt64(bool* ok) const

595 {

596 if (!m_impl) {

597 if (ok)

598 *ok = false;

599 return 0;

600 }

601 return m_impl->toUInt64(ok);

602 }

603

604 intptr_t String::toIntPtr(bool* ok) const

605 {

606 if (!m_impl) {

607 if (ok)

608 *ok = false;

609 return 0;

610 }

611 return m_impl->toIntPtr(ok);

612 }

613

614 double String::toDouble(bool* ok) const

615 {

616 if (!m_impl) {

617 if (ok)

618 *ok = false;

619 return 0.0;

620 }

621 return m_impl->toDouble(ok);

622 }

623

624 float String::toFloat(bool* ok) const

625 {

626 if (!m_impl) {

627 if (ok)

628 *ok = false;

629 return 0.0f;

630 }

631 return m_impl->toFloat(ok);

632 }

633

634 String String::isolatedCopy() const

635 {

636 if (!m_impl)

637 return String();

638 return m_impl->isolatedCopy();

639 }

640

641 bool String::isSafeToSendToAnotherThread() const

642 {

643 if (!impl())

644 return true;

645 // AtomicStrings are not safe to send between threads as ~StringImpl()

646 // will try to remove them from the wrong AtomicStringTable.

647 if (impl()->isAtomic())

648 return false;

649 if (impl()->hasOneRef())

650 return true;

651 if (isEmpty())

652 return true;

653 return false;

654 }

655

656 void String::split(const String& separator, bool allowEmptyEntries, Vector<Strin g>& result) const

657 {

658 result.clear();

659

660 unsigned startPos = 0;

661 size_t endPos;

662 while ((endPos = find(separator, startPos)) != notFound) {

663 if (allowEmptyEntries \|\| startPos != endPos)

664 result.append(substring(startPos, endPos - startPos));

665 startPos = endPos + separator.length();

666 }

667 if (allowEmptyEntries \|\| startPos != length())

668 result.append(substring(startPos));

669 }

670

671 void String::split(UChar separator, bool allowEmptyEntries, Vector<String>& resu lt) const

672 {

673 result.clear();

674

675 unsigned startPos = 0;

676 size_t endPos;

677 while ((endPos = find(separator, startPos)) != notFound) {

678 if (allowEmptyEntries \|\| startPos != endPos)

679 result.append(substring(startPos, endPos - startPos));

680 startPos = endPos + 1;

681 }

682 if (allowEmptyEntries \|\| startPos != length())

683 result.append(substring(startPos));

684 }

685

686 CString String::ascii() const

687 {

688 // Printable ASCII characters 32..127 and the null character are

689 // preserved, characters outside of this range are converted to '?'.

690

691 unsigned length = this->length();

692 if (!length) {

693 char* characterBuffer;

694 return CString::newUninitialized(length, characterBuffer);

695 }

696

697 if (this->is8Bit()) {

698 const LChar* characters = this->characters8();

699

700 char* characterBuffer;

701 CString result = CString::newUninitialized(length, characterBuffer);

702

703 for (unsigned i = 0; i < length; ++i) {

704 LChar ch = characters[i];

705 characterBuffer[i] = ch && (ch < 0x20 \|\| ch > 0x7f) ? '?' : ch;

706 }

707

708 return result;

709 }

710

711 const UChar* characters = this->characters16();

712

713 char* characterBuffer;

714 CString result = CString::newUninitialized(length, characterBuffer);

715

716 for (unsigned i = 0; i < length; ++i) {

717 UChar ch = characters[i];

718 characterBuffer[i] = ch && (ch < 0x20 \|\| ch > 0x7f) ? '?' : ch;

719 }

720

721 return result;

722 }

723

724 CString String::latin1() const

725 {

726 // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are

727 // preserved, characters outside of this range are converted to '?'.

728

729 unsigned length = this->length();

730

731 if (!length)

732 return CString("", 0);

733

734 if (is8Bit())

735 return CString(reinterpret_cast<const char*>(this->characters8()), lengt h);

736

737 const UChar* characters = this->characters16();

738

739 char* characterBuffer;

740 CString result = CString::newUninitialized(length, characterBuffer);

741

742 for (unsigned i = 0; i < length; ++i) {

743 UChar ch = characters[i];

744 characterBuffer[i] = ch > 0xff ? '?' : ch;

745 }

746

747 return result;

748 }

749

750 // Helper to write a three-byte UTF-8 code point to the buffer, caller must chec k room is available.

751 static inline void putUTF8Triple(char*& buffer, UChar ch)

752 {

753 ASSERT(ch >= 0x0800);

754 *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) \| 0xE0);

755 *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) \| 0x80);

756 *buffer++ = static_cast<char>((ch & 0x3F) \| 0x80);

757 }

758

759 CString String::utf8(ConversionMode mode) const

760 {

761 unsigned length = this->length();

762

763 if (!length)

764 return CString("", 0);

765

766 // Allocate a buffer big enough to hold all the characters

767 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).

768 // Optimization ideas, if we find this function is hot:

769 // * We could speculatively create a CStringBuffer to contain 'length'

770 // characters, and resize if necessary (i.e. if the buffer contains

771 // non-ascii characters). (Alternatively, scan the buffer first for

772 // ascii characters, so we know this will be sufficient).

773 // * We could allocate a CStringBuffer with an appropriate size to

774 // have a good chance of being able to write the string into the

775 // buffer without reallocing (say, 1.5 x length).

776 if (length > numeric_limits<unsigned>::max() / 3)

777 return CString();

778 Vector<char, 1024> bufferVector(length * 3);

779

780 char* buffer = bufferVector.data();

781

782 if (is8Bit()) {

783 const LChar* characters = this->characters8();

784

785 ConversionResult result = convertLatin1ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size());

786 ASSERT_UNUSED(result, result != targetExhausted); // (length * 3) should be sufficient for any conversion

787 } else {

788 const UChar* characters = this->characters16();

789

790 if (mode == StrictConversionReplacingUnpairedSurrogatesWithFFFD) {

791 const UChar* charactersEnd = characters + length;

792 char* bufferEnd = buffer + bufferVector.size();

793 while (characters < charactersEnd) {

794 // Use strict conversion to detect unpaired surrogates.

795 ConversionResult result = convertUTF16ToUTF8(&characters, charac tersEnd, &buffer, bufferEnd, true);

796 ASSERT(result != targetExhausted);

797 // Conversion fails when there is an unpaired surrogate.

798 // Put replacement character (U+FFFD) instead of the unpaired su rrogate.

799 if (result != conversionOK) {

800 ASSERT((0xD800 <= characters && characters <= 0xDFFF));

801 // There should be room left, since one UChar hasn't been co nverted.

802 ASSERT((buffer + 3) <= bufferEnd);

803 putUTF8Triple(buffer, replacementCharacter);

804 ++characters;

805 }

806 }

807 } else {

808 bool strict = mode == StrictConversion;

809 ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict);

810 ASSERT(result != targetExhausted); // (length * 3) should be suffici ent for any conversion

811

812 // Only produced from strict conversion.

813 if (result == sourceIllegal) {

814 ASSERT(strict);

815 return CString();

816 }

817

818 // Check for an unconverted high surrogate.

819 if (result == sourceExhausted) {

820 if (strict)

821 return CString();

822 // This should be one unpaired high surrogate. Treat it the same

823 // was as an unpaired high surrogate would have been handled in

824 // the middle of a string with non-strict conversion - which is

825 // to say, simply encode it to UTF-8.

826 ASSERT((characters + 1) == (this->characters() + length));

827 ASSERT((characters >= 0xD800) && (characters <= 0xDBFF));

828 // There should be room left, since one UChar hasn't been conver ted.

829 ASSERT((buffer + 3) <= (buffer + bufferVector.size()));

830 putUTF8Triple(buffer, *characters);

831 }

832 }

833 }

834

835 return CString(bufferVector.data(), buffer - bufferVector.data());

836 }

837

838 String String::make8BitFrom16BitSource(const UChar* source, size_t length)

839 {

840 if (!length)

841 return String();

842

843 LChar* destination;

844 String result = String::createUninitialized(length, destination);

845

846 copyLCharsFromUCharSource(destination, source, length);

847

848 return result;

849 }

850

851 String String::make16BitFrom8BitSource(const LChar* source, size_t length)

852 {

853 if (!length)

854 return String();

855

856 UChar* destination;

857 String result = String::createUninitialized(length, destination);

858

859 StringImpl::copyChars(destination, source, length);

860

861 return result;

862 }

863

864 String String::fromUTF8(const LChar* stringStart, size_t length)

865 {

866 RELEASE_ASSERT(length <= numeric_limits<unsigned>::max());

867

868 if (!stringStart)

869 return String();

870

871 if (!length)

872 return emptyString();

873

874 // We'll use a StringImpl as a buffer; if the source string only contains as cii this should be

875 // the right length, if there are any multi-byte sequences this buffer will be too large.

876 UChar* buffer;

877 String stringBuffer(StringImpl::createUninitialized(length, buffer));

878 UChar* bufferEnd = buffer + length;

879

880 // Try converting into the buffer.

881 const char* stringCurrent = reinterpret_cast<const char*>(stringStart);

882 bool isAllASCII;

883 if (convertUTF8ToUTF16(&stringCurrent, reinterpret_cast<const char *>(string Start + length), &buffer, bufferEnd, &isAllASCII) != conversionOK)

884 return String();

885

886 if (isAllASCII)

887 return String(stringStart, length);

888

889 // stringBuffer is full (the input must have been all ascii) so just return it!

890 if (buffer == bufferEnd)

891 return stringBuffer;

892

893 // stringBuffer served its purpose as a buffer, copy the contents out into a new string.

894 unsigned utf16Length = buffer - stringBuffer.characters();

895 ASSERT(utf16Length < length);

896 return String(stringBuffer.characters(), utf16Length);

897 }

898

899 String String::fromUTF8(const LChar* string)

900 {

901 if (!string)

902 return String();

903 return fromUTF8(string, strlen(reinterpret_cast<const char*>(string)));

904 }

905

906 String String::fromUTF8(const CString& s)

907 {

908 return fromUTF8(s.data());

909 }

910

911 String String::fromUTF8WithLatin1Fallback(const LChar* string, size_t size)

912 {

913 String utf8 = fromUTF8(string, size);

914 if (!utf8)

915 return String(string, size);

916 return utf8;

917 }

918

919 // String Operations

920

921 static bool isCharacterAllowedInBase(UChar c, int base)

922 {

923 if (c > 0x7F)

924 return false;

925 if (isASCIIDigit(c))

926 return c - '0' < base;

927 if (isASCIIAlpha(c)) {

928 if (base > 36)

929 base = 36;

930 return (c >= 'a' && c < 'a' + base - 10)

931 \|\| (c >= 'A' && c < 'A' + base - 10);

932 }

933 return false;

934 }

935

936 template <typename IntegralType, typename CharType>

937 static inline IntegralType toIntegralType(const CharType* data, size_t length, b ool* ok, int base)

938 {

939 static const IntegralType integralMax = numeric_limits<IntegralType>::max();

940 static const bool isSigned = numeric_limits<IntegralType>::is_signed;

941 const IntegralType maxMultiplier = integralMax / base;

942

943 IntegralType value = 0;

944 bool isOk = false;

945 bool isNegative = false;

946

947 if (!data)

948 goto bye;

949

950 // skip leading whitespace

951 while (length && isSpaceOrNewline(*data)) {

952 --length;

953 ++data;

954 }

955

956 if (isSigned && length && *data == '-') {

957 --length;

958 ++data;

959 isNegative = true;

960 } else if (length && *data == '+') {

961 --length;

962 ++data;

963 }

964

965 if (!length \|\| !isCharacterAllowedInBase(*data, base))

966 goto bye;

967

968 while (length && isCharacterAllowedInBase(*data, base)) {

969 --length;

970 IntegralType digitValue;

971 CharType c = *data;

972 if (isASCIIDigit(c))

973 digitValue = c - '0';

974 else if (c >= 'a')

975 digitValue = c - 'a' + 10;

976 else

977 digitValue = c - 'A' + 10;

978

979 if (value > maxMultiplier \|\| (value == maxMultiplier && digitValue > (in tegralMax % base) + isNegative))

980 goto bye;

981

982 value = base * value + digitValue;

983 ++data;

984 }

985

986 #if COMPILER(MSVC)

987 #pragma warning(push, 0)

988 #pragma warning(disable:4146)

989 #endif

990

991 if (isNegative)

992 value = -value;

993

994 #if COMPILER(MSVC)

995 #pragma warning(pop)

996 #endif

997

998 // skip trailing space

999 while (length && isSpaceOrNewline(*data)) {

1000 --length;

1001 ++data;

1002 }

1003

1004 if (!length)

1005 isOk = true;

1006 bye:

1007 if (ok)

1008 *ok = isOk;

1009 return isOk ? value : 0;

1010 }

1011

1012 template <typename CharType>

1013 static unsigned lengthOfCharactersAsInteger(const CharType* data, size_t length)

1014 {

1015 size_t i = 0;

1016

1017 // Allow leading spaces.

1018 for (; i != length; ++i) {

1019 if (!isSpaceOrNewline(data[i]))

1020 break;

1021 }

1022

1023 // Allow sign.

1024 if (i != length && (data[i] == '+' \|\| data[i] == '-'))

1025 ++i;

1026

1027 // Allow digits.

1028 for (; i != length; ++i) {

1029 if (!isASCIIDigit(data[i]))

1030 break;

1031 }

1032

1033 return i;

1034 }

1035

1036 int charactersToIntStrict(const LChar* data, size_t length, bool* ok, int base)

1037 {

1038 return toIntegralType<int, LChar>(data, length, ok, base);

1039 }

1040

1041 int charactersToIntStrict(const UChar* data, size_t length, bool* ok, int base)

1042 {

1043 return toIntegralType<int, UChar>(data, length, ok, base);

1044 }

1045

1046 unsigned charactersToUIntStrict(const LChar* data, size_t length, bool* ok, int base)

1047 {

1048 return toIntegralType<unsigned, LChar>(data, length, ok, base);

1049 }

1050

1051 unsigned charactersToUIntStrict(const UChar* data, size_t length, bool* ok, int base)

1052 {

1053 return toIntegralType<unsigned, UChar>(data, length, ok, base);

1054 }

1055

1056 int64_t charactersToInt64Strict(const LChar* data, size_t length, bool* ok, int base)

1057 {

1058 return toIntegralType<int64_t, LChar>(data, length, ok, base);

1059 }

1060

1061 int64_t charactersToInt64Strict(const UChar* data, size_t length, bool* ok, int base)

1062 {

1063 return toIntegralType<int64_t, UChar>(data, length, ok, base);

1064 }

1065

1066 uint64_t charactersToUInt64Strict(const LChar* data, size_t length, bool* ok, in t base)

1067 {

1068 return toIntegralType<uint64_t, LChar>(data, length, ok, base);

1069 }

1070

1071 uint64_t charactersToUInt64Strict(const UChar* data, size_t length, bool* ok, in t base)

1072 {

1073 return toIntegralType<uint64_t, UChar>(data, length, ok, base);

1074 }

1075

1076 intptr_t charactersToIntPtrStrict(const LChar* data, size_t length, bool* ok, in t base)

1077 {

1078 return toIntegralType<intptr_t, LChar>(data, length, ok, base);

1079 }

1080

1081 intptr_t charactersToIntPtrStrict(const UChar* data, size_t length, bool* ok, in t base)

1082 {

1083 return toIntegralType<intptr_t, UChar>(data, length, ok, base);

1084 }

1085

1086 int charactersToInt(const LChar* data, size_t length, bool* ok)

1087 {

1088 return toIntegralType<int, LChar>(data, lengthOfCharactersAsInteger<LChar>(d ata, length), ok, 10);

1089 }

1090

1091 int charactersToInt(const UChar* data, size_t length, bool* ok)

1092 {

1093 return toIntegralType<int, UChar>(data, lengthOfCharactersAsInteger(data, le ngth), ok, 10);

1094 }

1095

1096 unsigned charactersToUInt(const LChar* data, size_t length, bool* ok)

1097 {

1098 return toIntegralType<unsigned, LChar>(data, lengthOfCharactersAsInteger<LCh ar>(data, length), ok, 10);

1099 }

1100

1101 unsigned charactersToUInt(const UChar* data, size_t length, bool* ok)

1102 {

1103 return toIntegralType<unsigned, UChar>(data, lengthOfCharactersAsInteger<UCh ar>(data, length), ok, 10);

1104 }

1105

1106 int64_t charactersToInt64(const LChar* data, size_t length, bool* ok)

1107 {

1108 return toIntegralType<int64_t, LChar>(data, lengthOfCharactersAsInteger<LCha r>(data, length), ok, 10);

1109 }

1110

1111 int64_t charactersToInt64(const UChar* data, size_t length, bool* ok)

1112 {

1113 return toIntegralType<int64_t, UChar>(data, lengthOfCharactersAsInteger<UCha r>(data, length), ok, 10);

1114 }

1115

1116 uint64_t charactersToUInt64(const LChar* data, size_t length, bool* ok)

1117 {

1118 return toIntegralType<uint64_t, LChar>(data, lengthOfCharactersAsInteger<LCh ar>(data, length), ok, 10);

1119 }

1120

1121 uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok)

1122 {

1123 return toIntegralType<uint64_t, UChar>(data, lengthOfCharactersAsInteger<UCh ar>(data, length), ok, 10);

1124 }

1125

1126 intptr_t charactersToIntPtr(const LChar* data, size_t length, bool* ok)

1127 {

1128 return toIntegralType<intptr_t, LChar>(data, lengthOfCharactersAsInteger<LCh ar>(data, length), ok, 10);

1129 }

1130

1131 intptr_t charactersToIntPtr(const UChar* data, size_t length, bool* ok)

1132 {

1133 return toIntegralType<intptr_t, UChar>(data, lengthOfCharactersAsInteger<UCh ar>(data, length), ok, 10);

1134 }

1135

1136 enum TrailingJunkPolicy { DisallowTrailingJunk, AllowTrailingJunk };

1137

1138 template <typename CharType, TrailingJunkPolicy policy>

1139 static inline double toDoubleType(const CharType* data, size_t length, bool* ok, size_t& parsedLength)

1140 {

1141 size_t leadingSpacesLength = 0;

1142 while (leadingSpacesLength < length && isASCIISpace(data[leadingSpacesLength ]))

1143 ++leadingSpacesLength;

1144

1145 double number = parseDouble(data + leadingSpacesLength, length - leadingSpac esLength, parsedLength);

1146 if (!parsedLength) {

1147 if (ok)

1148 *ok = false;

1149 return 0.0;

1150 }

1151

1152 parsedLength += leadingSpacesLength;

1153 if (ok)

1154 *ok = policy == AllowTrailingJunk \|\| parsedLength == length;

1155 return number;

1156 }

1157

1158 double charactersToDouble(const LChar* data, size_t length, bool* ok)

1159 {

1160 size_t parsedLength;

1161 return toDoubleType<LChar, DisallowTrailingJunk>(data, length, ok, parsedLen gth);

1162 }

1163

1164 double charactersToDouble(const UChar* data, size_t length, bool* ok)

1165 {

1166 size_t parsedLength;

1167 return toDoubleType<UChar, DisallowTrailingJunk>(data, length, ok, parsedLen gth);

1168 }

1169

1170 float charactersToFloat(const LChar* data, size_t length, bool* ok)

1171 {

1172 // FIXME: This will return ok even when the string fits into a double but no t a float.

1173 size_t parsedLength;

1174 return static_cast<float>(toDoubleType<LChar, DisallowTrailingJunk>(data, le ngth, ok, parsedLength));

1175 }

1176

1177 float charactersToFloat(const UChar* data, size_t length, bool* ok)

1178 {

1179 // FIXME: This will return ok even when the string fits into a double but no t a float.

1180 size_t parsedLength;

1181 return static_cast<float>(toDoubleType<UChar, DisallowTrailingJunk>(data, le ngth, ok, parsedLength));

1182 }

1183

1184 float charactersToFloat(const LChar* data, size_t length, size_t& parsedLength)

1185 {

1186 // FIXME: This will return ok even when the string fits into a double but no t a float.

1187 return static_cast<float>(toDoubleType<LChar, AllowTrailingJunk>(data, lengt h, 0, parsedLength));

1188 }

1189

1190 float charactersToFloat(const UChar* data, size_t length, size_t& parsedLength)

1191 {

1192 // FIXME: This will return ok even when the string fits into a double but no t a float.

1193 return static_cast<float>(toDoubleType<UChar, AllowTrailingJunk>(data, lengt h, 0, parsedLength));

1194 }

1195

1196 const String& emptyString()

1197 {

1198 DEFINE_STATIC_LOCAL(String, emptyString, (StringImpl::empty()));

1199 return emptyString;

1200 }

1201

1202 } // namespace WTF

1203

1204 #ifndef NDEBUG

1205 // For use in the debugger

1206 String* string(const char*);

1207 Vector<char> asciiDebug(StringImpl* impl);

1208 Vector<char> asciiDebug(String& string);

1209

1210 void String::show() const

1211 {

1212 dataLogF("%s\n", asciiDebug(impl()).data());

1213 }

1214

1215 String* string(const char* s)

1216 {

1217 // leaks memory!

1218 return new String(s);

1219 }

1220

1221 Vector<char> asciiDebug(StringImpl* impl)

1222 {

1223 if (!impl)

1224 return asciiDebug(String("[null]").impl());

1225

1226 Vector<char> buffer;

1227 for (unsigned i = 0; i < impl->length(); ++i) {

1228 UChar ch = (*impl)[i];

1229 if (isASCIIPrintable(ch)) {

1230 if (ch == '\\')

1231 buffer.append(ch);

1232 buffer.append(ch);

1233 } else {

1234 buffer.append('\\');

1235 buffer.append('u');

1236 appendUnsignedAsHexFixedSize(ch, buffer, 4);

1237 }

1238 }

1239 buffer.append('\0');

1240 return buffer;

1241 }

1242

1243 Vector<char> asciiDebug(String& string)

1244 {

1245 return asciiDebug(string.impl());

1246 }

1247

1248 #endif

OLD	NEW

« no previous file with comments | « Source/WTF/wtf/text/WTFString.h ('k') | Source/WTF/wtf/unicode/CharacterNames.h » ('j') | Source/config.h » ('J')