Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1204)

Side by Side Diff: Source/WTF/wtf/text/WTFString.cpp

Issue 14238015: Move Source/WTF/wtf to Source/wtf (Closed) Base URL: svn://svn.chromium.org/blink/trunk
Patch Set: Created 7 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 /*
2 * (C) 1999 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010, 2012 Apple Inc. All rights reserved.
4 * Copyright (C) 2007-2009 Torch Mobile, Inc.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 */
21
22 #include "config.h"
23 #include "WTFString.h"
24
25 #include "IntegerToStringConversion.h"
26 #include <stdarg.h>
27 #include <wtf/ASCIICType.h>
28 #include <wtf/DataLog.h>
29 #include <wtf/HexNumber.h>
30 #include <wtf/MathExtras.h>
31 #include <wtf/text/CString.h>
32 #include <wtf/StringExtras.h>
33 #include <wtf/Vector.h>
34 #include <wtf/dtoa.h>
35 #include <wtf/unicode/CharacterNames.h>
36 #include <wtf/unicode/UTF8.h>
37 #include <wtf/unicode/Unicode.h>
38
39 using namespace std;
40
41 namespace WTF {
42
43 using namespace Unicode;
44 using namespace std;
45
46 // Construct a string with UTF-16 data.
47 String::String(const UChar* characters, unsigned length)
48 : m_impl(characters ? StringImpl::create(characters, length) : 0)
49 {
50 }
51
52 // Construct a string with UTF-16 data, from a null-terminated source.
53 String::String(const UChar* str)
54 {
55 if (!str)
56 return;
57
58 size_t len = 0;
59 while (str[len] != UChar(0))
60 ++len;
61
62 RELEASE_ASSERT(len <= numeric_limits<unsigned>::max());
63
64 m_impl = StringImpl::create(str, len);
65 }
66
67 // Construct a string with latin1 data.
68 String::String(const LChar* characters, unsigned length)
69 : m_impl(characters ? StringImpl::create(characters, length) : 0)
70 {
71 }
72
73 String::String(const char* characters, unsigned length)
74 : m_impl(characters ? StringImpl::create(reinterpret_cast<const LChar*>(char acters), length) : 0)
75 {
76 }
77
78 // Construct a string with latin1 data, from a null-terminated source.
79 String::String(const LChar* characters)
80 : m_impl(characters ? StringImpl::create(characters) : 0)
81 {
82 }
83
84 String::String(const char* characters)
85 : m_impl(characters ? StringImpl::create(reinterpret_cast<const LChar*>(char acters)) : 0)
86 {
87 }
88
89 String::String(ASCIILiteral characters)
90 : m_impl(StringImpl::createFromLiteral(characters))
91 {
92 }
93
94 void String::append(const String& str)
95 {
96 if (str.isEmpty())
97 return;
98
99 // FIXME: This is extremely inefficient. So much so that we might want to ta ke this
100 // out of String's API. We can make it better by optimizing the case where e xactly
101 // one String is pointing at this StringImpl, but even then it's going to re quire a
102 // call to fastMalloc every single time.
103 if (str.m_impl) {
104 if (m_impl) {
105 if (m_impl->is8Bit() && str.m_impl->is8Bit()) {
106 LChar* data;
107 RELEASE_ASSERT(str.length() <= numeric_limits<unsigned>::max() - m_impl->length());
108 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_i mpl->length() + str.length(), data);
109 memcpy(data, m_impl->characters8(), m_impl->length() * sizeof(LC har));
110 memcpy(data + m_impl->length(), str.characters8(), str.length() * sizeof(LChar));
111 m_impl = newImpl.release();
112 return;
113 }
114 UChar* data;
115 RELEASE_ASSERT(str.length() <= numeric_limits<unsigned>::max() - m_i mpl->length());
116 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl- >length() + str.length(), data);
117 memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar)) ;
118 memcpy(data + m_impl->length(), str.characters(), str.length() * siz eof(UChar));
119 m_impl = newImpl.release();
120 } else
121 m_impl = str.m_impl;
122 }
123 }
124
125 void String::append(LChar c)
126 {
127 // FIXME: This is extremely inefficient. So much so that we might want to ta ke this
128 // out of String's API. We can make it better by optimizing the case where e xactly
129 // one String is pointing at this StringImpl, but even then it's going to re quire a
130 // call to fastMalloc every single time.
131 if (m_impl) {
132 UChar* data;
133 RELEASE_ASSERT(m_impl->length() < numeric_limits<unsigned>::max());
134 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->len gth() + 1, data);
135 memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar));
136 data[m_impl->length()] = c;
137 m_impl = newImpl.release();
138 } else
139 m_impl = StringImpl::create(&c, 1);
140 }
141
142 void String::append(UChar c)
143 {
144 // FIXME: This is extremely inefficient. So much so that we might want to ta ke this
145 // out of String's API. We can make it better by optimizing the case where e xactly
146 // one String is pointing at this StringImpl, but even then it's going to re quire a
147 // call to fastMalloc every single time.
148 if (m_impl) {
149 UChar* data;
150 RELEASE_ASSERT(m_impl->length() < numeric_limits<unsigned>::max());
151 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->len gth() + 1, data);
152 memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar));
153 data[m_impl->length()] = c;
154 m_impl = newImpl.release();
155 } else
156 m_impl = StringImpl::create(&c, 1);
157 }
158
159 int codePointCompare(const String& a, const String& b)
160 {
161 return codePointCompare(a.impl(), b.impl());
162 }
163
164 void String::insert(const String& str, unsigned pos)
165 {
166 if (str.isEmpty()) {
167 if (str.isNull())
168 return;
169 if (isNull())
170 m_impl = str.impl();
171 return;
172 }
173 insert(str.characters(), str.length(), pos);
174 }
175
176 void String::append(const LChar* charactersToAppend, unsigned lengthToAppend)
177 {
178 if (!m_impl) {
179 if (!charactersToAppend)
180 return;
181 m_impl = StringImpl::create(charactersToAppend, lengthToAppend);
182 return;
183 }
184
185 if (!lengthToAppend)
186 return;
187
188 ASSERT(charactersToAppend);
189
190 unsigned strLength = m_impl->length();
191
192 if (m_impl->is8Bit()) {
193 RELEASE_ASSERT(lengthToAppend <= numeric_limits<unsigned>::max() - strLe ngth);
194 LChar* data;
195 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(strLength + lengthToAppend, data);
196 StringImpl::copyChars(data, m_impl->characters8(), strLength);
197 StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppe nd);
198 m_impl = newImpl.release();
199 return;
200 }
201
202 RELEASE_ASSERT(lengthToAppend <= numeric_limits<unsigned>::max() - strLength );
203 UChar* data;
204 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() + leng thToAppend, data);
205 StringImpl::copyChars(data, m_impl->characters16(), strLength);
206 StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppend);
207 m_impl = newImpl.release();
208 }
209
210 void String::append(const UChar* charactersToAppend, unsigned lengthToAppend)
211 {
212 if (!m_impl) {
213 if (!charactersToAppend)
214 return;
215 m_impl = StringImpl::create(charactersToAppend, lengthToAppend);
216 return;
217 }
218
219 if (!lengthToAppend)
220 return;
221
222 unsigned strLength = m_impl->length();
223
224 ASSERT(charactersToAppend);
225 RELEASE_ASSERT(lengthToAppend <= numeric_limits<unsigned>::max() - strLength );
226 UChar* data;
227 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(strLength + len gthToAppend, data);
228 if (m_impl->is8Bit())
229 StringImpl::copyChars(data, characters8(), strLength);
230 else
231 StringImpl::copyChars(data, characters16(), strLength);
232 StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppend);
233 m_impl = newImpl.release();
234 }
235
236
237 void String::insert(const UChar* charactersToInsert, unsigned lengthToInsert, un signed position)
238 {
239 if (position >= length()) {
240 append(charactersToInsert, lengthToInsert);
241 return;
242 }
243
244 ASSERT(m_impl);
245
246 if (!lengthToInsert)
247 return;
248
249 ASSERT(charactersToInsert);
250 UChar* data;
251 RELEASE_ASSERT(lengthToInsert <= numeric_limits<unsigned>::max() - length()) ;
252 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() + leng thToInsert, data);
253 memcpy(data, characters(), position * sizeof(UChar));
254 memcpy(data + position, charactersToInsert, lengthToInsert * sizeof(UChar));
255 memcpy(data + position + lengthToInsert, characters() + position, (length() - position) * sizeof(UChar));
256 m_impl = newImpl.release();
257 }
258
259 UChar32 String::characterStartingAt(unsigned i) const
260 {
261 if (!m_impl || i >= m_impl->length())
262 return 0;
263 return m_impl->characterStartingAt(i);
264 }
265
266 void String::truncate(unsigned position)
267 {
268 if (position >= length())
269 return;
270 UChar* data;
271 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(position, data) ;
272 memcpy(data, characters(), position * sizeof(UChar));
273 m_impl = newImpl.release();
274 }
275
276 template <typename CharacterType>
277 inline void String::removeInternal(const CharacterType* characters, unsigned pos ition, int lengthToRemove)
278 {
279 CharacterType* data;
280 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() - leng thToRemove, data);
281 memcpy(data, characters, position * sizeof(CharacterType));
282 memcpy(data + position, characters + position + lengthToRemove,
283 (length() - lengthToRemove - position) * sizeof(CharacterType));
284
285 m_impl = newImpl.release();
286 }
287
288 void String::remove(unsigned position, int lengthToRemove)
289 {
290 if (lengthToRemove <= 0)
291 return;
292 if (position >= length())
293 return;
294 if (static_cast<unsigned>(lengthToRemove) > length() - position)
295 lengthToRemove = length() - position;
296
297 if (is8Bit()) {
298 removeInternal(characters8(), position, lengthToRemove);
299
300 return;
301 }
302
303 removeInternal(characters16(), position, lengthToRemove);
304 }
305
306 String String::substring(unsigned pos, unsigned len) const
307 {
308 if (!m_impl)
309 return String();
310 return m_impl->substring(pos, len);
311 }
312
313 String String::substringSharingImpl(unsigned offset, unsigned length) const
314 {
315 // FIXME: We used to check against a limit of Heap::minExtraCost / sizeof(UC har).
316
317 unsigned stringLength = this->length();
318 offset = min(offset, stringLength);
319 length = min(length, stringLength - offset);
320
321 if (!offset && length == stringLength)
322 return *this;
323 return String(StringImpl::create(m_impl, offset, length));
324 }
325
326 String String::lower() const
327 {
328 if (!m_impl)
329 return String();
330 return m_impl->lower();
331 }
332
333 String String::upper() const
334 {
335 if (!m_impl)
336 return String();
337 return m_impl->upper();
338 }
339
340 String String::stripWhiteSpace() const
341 {
342 if (!m_impl)
343 return String();
344 return m_impl->stripWhiteSpace();
345 }
346
347 String String::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace) const
348 {
349 if (!m_impl)
350 return String();
351 return m_impl->stripWhiteSpace(isWhiteSpace);
352 }
353
354 String String::simplifyWhiteSpace() const
355 {
356 if (!m_impl)
357 return String();
358 return m_impl->simplifyWhiteSpace();
359 }
360
361 String String::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace) const
362 {
363 if (!m_impl)
364 return String();
365 return m_impl->simplifyWhiteSpace(isWhiteSpace);
366 }
367
368 String String::removeCharacters(CharacterMatchFunctionPtr findMatch) const
369 {
370 if (!m_impl)
371 return String();
372 return m_impl->removeCharacters(findMatch);
373 }
374
375 String String::foldCase() const
376 {
377 if (!m_impl)
378 return String();
379 return m_impl->foldCase();
380 }
381
382 bool String::percentage(int& result) const
383 {
384 if (!m_impl || !m_impl->length())
385 return false;
386
387 if ((*m_impl)[m_impl->length() - 1] != '%')
388 return false;
389
390 result = charactersToIntStrict(m_impl->characters(), m_impl->length() - 1);
391 return true;
392 }
393
394 const UChar* String::charactersWithNullTermination()
395 {
396 if (!m_impl)
397 return 0;
398 if (m_impl->hasTerminatingNullCharacter())
399 return m_impl->characters();
400 m_impl = StringImpl::createWithTerminatingNullCharacter(*m_impl);
401 return m_impl->characters();
402 }
403
404 String String::format(const char *format, ...)
405 {
406 #if OS(WINCE)
407 va_list args;
408 va_start(args, format);
409
410 Vector<char, 256> buffer;
411
412 int bufferSize = 256;
413 buffer.resize(bufferSize);
414 for (;;) {
415 int written = vsnprintf(buffer.data(), bufferSize, format, args);
416 va_end(args);
417
418 if (written == 0)
419 return String("");
420 if (written > 0)
421 return StringImpl::create(reinterpret_cast<const LChar*>(buffer.data ()), written);
422
423 bufferSize <<= 1;
424 buffer.resize(bufferSize);
425 va_start(args, format);
426 }
427
428 #else
429 va_list args;
430 va_start(args, format);
431
432 Vector<char, 256> buffer;
433
434 // Do the format once to get the length.
435 #if COMPILER(MSVC)
436 int result = _vscprintf(format, args);
437 #else
438 char ch;
439 int result = vsnprintf(&ch, 1, format, args);
440 // We need to call va_end() and then va_start() again here, as the
441 // contents of args is undefined after the call to vsnprintf
442 // according to http://man.cx/snprintf(3)
443 //
444 // Not calling va_end/va_start here happens to work on lots of
445 // systems, but fails e.g. on 64bit Linux.
446 va_end(args);
447 va_start(args, format);
448 #endif
449
450 if (result == 0)
451 return String("");
452 if (result < 0)
453 return String();
454 unsigned len = result;
455 buffer.grow(len + 1);
456
457 // Now do the formatting again, guaranteed to fit.
458 vsnprintf(buffer.data(), buffer.size(), format, args);
459
460 va_end(args);
461
462 return StringImpl::create(reinterpret_cast<const LChar*>(buffer.data()), len );
463 #endif
464 }
465
466 String String::number(int number)
467 {
468 return numberToStringSigned<String>(number);
469 }
470
471 String String::number(unsigned int number)
472 {
473 return numberToStringUnsigned<String>(number);
474 }
475
476 String String::number(long number)
477 {
478 return numberToStringSigned<String>(number);
479 }
480
481 String String::number(unsigned long number)
482 {
483 return numberToStringUnsigned<String>(number);
484 }
485
486 String String::number(long long number)
487 {
488 return numberToStringSigned<String>(number);
489 }
490
491 String String::number(unsigned long long number)
492 {
493 return numberToStringUnsigned<String>(number);
494 }
495
496 String String::number(double number, unsigned precision, TrailingZerosTruncating Policy trailingZerosTruncatingPolicy)
497 {
498 NumberToStringBuffer buffer;
499 return String(numberToFixedPrecisionString(number, precision, buffer, traili ngZerosTruncatingPolicy == TruncateTrailingZeros));
500 }
501
502 String String::numberToStringECMAScript(double number)
503 {
504 NumberToStringBuffer buffer;
505 return String(numberToString(number, buffer));
506 }
507
508 String String::numberToStringFixedWidth(double number, unsigned decimalPlaces)
509 {
510 NumberToStringBuffer buffer;
511 return String(numberToFixedWidthString(number, decimalPlaces, buffer));
512 }
513
514 int String::toIntStrict(bool* ok, int base) const
515 {
516 if (!m_impl) {
517 if (ok)
518 *ok = false;
519 return 0;
520 }
521 return m_impl->toIntStrict(ok, base);
522 }
523
524 unsigned String::toUIntStrict(bool* ok, int base) const
525 {
526 if (!m_impl) {
527 if (ok)
528 *ok = false;
529 return 0;
530 }
531 return m_impl->toUIntStrict(ok, base);
532 }
533
534 int64_t String::toInt64Strict(bool* ok, int base) const
535 {
536 if (!m_impl) {
537 if (ok)
538 *ok = false;
539 return 0;
540 }
541 return m_impl->toInt64Strict(ok, base);
542 }
543
544 uint64_t String::toUInt64Strict(bool* ok, int base) const
545 {
546 if (!m_impl) {
547 if (ok)
548 *ok = false;
549 return 0;
550 }
551 return m_impl->toUInt64Strict(ok, base);
552 }
553
554 intptr_t String::toIntPtrStrict(bool* ok, int base) const
555 {
556 if (!m_impl) {
557 if (ok)
558 *ok = false;
559 return 0;
560 }
561 return m_impl->toIntPtrStrict(ok, base);
562 }
563
564 int String::toInt(bool* ok) const
565 {
566 if (!m_impl) {
567 if (ok)
568 *ok = false;
569 return 0;
570 }
571 return m_impl->toInt(ok);
572 }
573
574 unsigned String::toUInt(bool* ok) const
575 {
576 if (!m_impl) {
577 if (ok)
578 *ok = false;
579 return 0;
580 }
581 return m_impl->toUInt(ok);
582 }
583
584 int64_t String::toInt64(bool* ok) const
585 {
586 if (!m_impl) {
587 if (ok)
588 *ok = false;
589 return 0;
590 }
591 return m_impl->toInt64(ok);
592 }
593
594 uint64_t String::toUInt64(bool* ok) const
595 {
596 if (!m_impl) {
597 if (ok)
598 *ok = false;
599 return 0;
600 }
601 return m_impl->toUInt64(ok);
602 }
603
604 intptr_t String::toIntPtr(bool* ok) const
605 {
606 if (!m_impl) {
607 if (ok)
608 *ok = false;
609 return 0;
610 }
611 return m_impl->toIntPtr(ok);
612 }
613
614 double String::toDouble(bool* ok) const
615 {
616 if (!m_impl) {
617 if (ok)
618 *ok = false;
619 return 0.0;
620 }
621 return m_impl->toDouble(ok);
622 }
623
624 float String::toFloat(bool* ok) const
625 {
626 if (!m_impl) {
627 if (ok)
628 *ok = false;
629 return 0.0f;
630 }
631 return m_impl->toFloat(ok);
632 }
633
634 String String::isolatedCopy() const
635 {
636 if (!m_impl)
637 return String();
638 return m_impl->isolatedCopy();
639 }
640
641 bool String::isSafeToSendToAnotherThread() const
642 {
643 if (!impl())
644 return true;
645 // AtomicStrings are not safe to send between threads as ~StringImpl()
646 // will try to remove them from the wrong AtomicStringTable.
647 if (impl()->isAtomic())
648 return false;
649 if (impl()->hasOneRef())
650 return true;
651 if (isEmpty())
652 return true;
653 return false;
654 }
655
656 void String::split(const String& separator, bool allowEmptyEntries, Vector<Strin g>& result) const
657 {
658 result.clear();
659
660 unsigned startPos = 0;
661 size_t endPos;
662 while ((endPos = find(separator, startPos)) != notFound) {
663 if (allowEmptyEntries || startPos != endPos)
664 result.append(substring(startPos, endPos - startPos));
665 startPos = endPos + separator.length();
666 }
667 if (allowEmptyEntries || startPos != length())
668 result.append(substring(startPos));
669 }
670
671 void String::split(UChar separator, bool allowEmptyEntries, Vector<String>& resu lt) const
672 {
673 result.clear();
674
675 unsigned startPos = 0;
676 size_t endPos;
677 while ((endPos = find(separator, startPos)) != notFound) {
678 if (allowEmptyEntries || startPos != endPos)
679 result.append(substring(startPos, endPos - startPos));
680 startPos = endPos + 1;
681 }
682 if (allowEmptyEntries || startPos != length())
683 result.append(substring(startPos));
684 }
685
686 CString String::ascii() const
687 {
688 // Printable ASCII characters 32..127 and the null character are
689 // preserved, characters outside of this range are converted to '?'.
690
691 unsigned length = this->length();
692 if (!length) {
693 char* characterBuffer;
694 return CString::newUninitialized(length, characterBuffer);
695 }
696
697 if (this->is8Bit()) {
698 const LChar* characters = this->characters8();
699
700 char* characterBuffer;
701 CString result = CString::newUninitialized(length, characterBuffer);
702
703 for (unsigned i = 0; i < length; ++i) {
704 LChar ch = characters[i];
705 characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch;
706 }
707
708 return result;
709 }
710
711 const UChar* characters = this->characters16();
712
713 char* characterBuffer;
714 CString result = CString::newUninitialized(length, characterBuffer);
715
716 for (unsigned i = 0; i < length; ++i) {
717 UChar ch = characters[i];
718 characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch;
719 }
720
721 return result;
722 }
723
724 CString String::latin1() const
725 {
726 // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
727 // preserved, characters outside of this range are converted to '?'.
728
729 unsigned length = this->length();
730
731 if (!length)
732 return CString("", 0);
733
734 if (is8Bit())
735 return CString(reinterpret_cast<const char*>(this->characters8()), lengt h);
736
737 const UChar* characters = this->characters16();
738
739 char* characterBuffer;
740 CString result = CString::newUninitialized(length, characterBuffer);
741
742 for (unsigned i = 0; i < length; ++i) {
743 UChar ch = characters[i];
744 characterBuffer[i] = ch > 0xff ? '?' : ch;
745 }
746
747 return result;
748 }
749
750 // Helper to write a three-byte UTF-8 code point to the buffer, caller must chec k room is available.
751 static inline void putUTF8Triple(char*& buffer, UChar ch)
752 {
753 ASSERT(ch >= 0x0800);
754 *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0);
755 *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80);
756 *buffer++ = static_cast<char>((ch & 0x3F) | 0x80);
757 }
758
759 CString String::utf8(ConversionMode mode) const
760 {
761 unsigned length = this->length();
762
763 if (!length)
764 return CString("", 0);
765
766 // Allocate a buffer big enough to hold all the characters
767 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
768 // Optimization ideas, if we find this function is hot:
769 // * We could speculatively create a CStringBuffer to contain 'length'
770 // characters, and resize if necessary (i.e. if the buffer contains
771 // non-ascii characters). (Alternatively, scan the buffer first for
772 // ascii characters, so we know this will be sufficient).
773 // * We could allocate a CStringBuffer with an appropriate size to
774 // have a good chance of being able to write the string into the
775 // buffer without reallocing (say, 1.5 x length).
776 if (length > numeric_limits<unsigned>::max() / 3)
777 return CString();
778 Vector<char, 1024> bufferVector(length * 3);
779
780 char* buffer = bufferVector.data();
781
782 if (is8Bit()) {
783 const LChar* characters = this->characters8();
784
785 ConversionResult result = convertLatin1ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size());
786 ASSERT_UNUSED(result, result != targetExhausted); // (length * 3) should be sufficient for any conversion
787 } else {
788 const UChar* characters = this->characters16();
789
790 if (mode == StrictConversionReplacingUnpairedSurrogatesWithFFFD) {
791 const UChar* charactersEnd = characters + length;
792 char* bufferEnd = buffer + bufferVector.size();
793 while (characters < charactersEnd) {
794 // Use strict conversion to detect unpaired surrogates.
795 ConversionResult result = convertUTF16ToUTF8(&characters, charac tersEnd, &buffer, bufferEnd, true);
796 ASSERT(result != targetExhausted);
797 // Conversion fails when there is an unpaired surrogate.
798 // Put replacement character (U+FFFD) instead of the unpaired su rrogate.
799 if (result != conversionOK) {
800 ASSERT((0xD800 <= *characters && *characters <= 0xDFFF));
801 // There should be room left, since one UChar hasn't been co nverted.
802 ASSERT((buffer + 3) <= bufferEnd);
803 putUTF8Triple(buffer, replacementCharacter);
804 ++characters;
805 }
806 }
807 } else {
808 bool strict = mode == StrictConversion;
809 ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict);
810 ASSERT(result != targetExhausted); // (length * 3) should be suffici ent for any conversion
811
812 // Only produced from strict conversion.
813 if (result == sourceIllegal) {
814 ASSERT(strict);
815 return CString();
816 }
817
818 // Check for an unconverted high surrogate.
819 if (result == sourceExhausted) {
820 if (strict)
821 return CString();
822 // This should be one unpaired high surrogate. Treat it the same
823 // was as an unpaired high surrogate would have been handled in
824 // the middle of a string with non-strict conversion - which is
825 // to say, simply encode it to UTF-8.
826 ASSERT((characters + 1) == (this->characters() + length));
827 ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF));
828 // There should be room left, since one UChar hasn't been conver ted.
829 ASSERT((buffer + 3) <= (buffer + bufferVector.size()));
830 putUTF8Triple(buffer, *characters);
831 }
832 }
833 }
834
835 return CString(bufferVector.data(), buffer - bufferVector.data());
836 }
837
838 String String::make8BitFrom16BitSource(const UChar* source, size_t length)
839 {
840 if (!length)
841 return String();
842
843 LChar* destination;
844 String result = String::createUninitialized(length, destination);
845
846 copyLCharsFromUCharSource(destination, source, length);
847
848 return result;
849 }
850
851 String String::make16BitFrom8BitSource(const LChar* source, size_t length)
852 {
853 if (!length)
854 return String();
855
856 UChar* destination;
857 String result = String::createUninitialized(length, destination);
858
859 StringImpl::copyChars(destination, source, length);
860
861 return result;
862 }
863
864 String String::fromUTF8(const LChar* stringStart, size_t length)
865 {
866 RELEASE_ASSERT(length <= numeric_limits<unsigned>::max());
867
868 if (!stringStart)
869 return String();
870
871 if (!length)
872 return emptyString();
873
874 // We'll use a StringImpl as a buffer; if the source string only contains as cii this should be
875 // the right length, if there are any multi-byte sequences this buffer will be too large.
876 UChar* buffer;
877 String stringBuffer(StringImpl::createUninitialized(length, buffer));
878 UChar* bufferEnd = buffer + length;
879
880 // Try converting into the buffer.
881 const char* stringCurrent = reinterpret_cast<const char*>(stringStart);
882 bool isAllASCII;
883 if (convertUTF8ToUTF16(&stringCurrent, reinterpret_cast<const char *>(string Start + length), &buffer, bufferEnd, &isAllASCII) != conversionOK)
884 return String();
885
886 if (isAllASCII)
887 return String(stringStart, length);
888
889 // stringBuffer is full (the input must have been all ascii) so just return it!
890 if (buffer == bufferEnd)
891 return stringBuffer;
892
893 // stringBuffer served its purpose as a buffer, copy the contents out into a new string.
894 unsigned utf16Length = buffer - stringBuffer.characters();
895 ASSERT(utf16Length < length);
896 return String(stringBuffer.characters(), utf16Length);
897 }
898
899 String String::fromUTF8(const LChar* string)
900 {
901 if (!string)
902 return String();
903 return fromUTF8(string, strlen(reinterpret_cast<const char*>(string)));
904 }
905
906 String String::fromUTF8(const CString& s)
907 {
908 return fromUTF8(s.data());
909 }
910
911 String String::fromUTF8WithLatin1Fallback(const LChar* string, size_t size)
912 {
913 String utf8 = fromUTF8(string, size);
914 if (!utf8)
915 return String(string, size);
916 return utf8;
917 }
918
919 // String Operations
920
921 static bool isCharacterAllowedInBase(UChar c, int base)
922 {
923 if (c > 0x7F)
924 return false;
925 if (isASCIIDigit(c))
926 return c - '0' < base;
927 if (isASCIIAlpha(c)) {
928 if (base > 36)
929 base = 36;
930 return (c >= 'a' && c < 'a' + base - 10)
931 || (c >= 'A' && c < 'A' + base - 10);
932 }
933 return false;
934 }
935
936 template <typename IntegralType, typename CharType>
937 static inline IntegralType toIntegralType(const CharType* data, size_t length, b ool* ok, int base)
938 {
939 static const IntegralType integralMax = numeric_limits<IntegralType>::max();
940 static const bool isSigned = numeric_limits<IntegralType>::is_signed;
941 const IntegralType maxMultiplier = integralMax / base;
942
943 IntegralType value = 0;
944 bool isOk = false;
945 bool isNegative = false;
946
947 if (!data)
948 goto bye;
949
950 // skip leading whitespace
951 while (length && isSpaceOrNewline(*data)) {
952 --length;
953 ++data;
954 }
955
956 if (isSigned && length && *data == '-') {
957 --length;
958 ++data;
959 isNegative = true;
960 } else if (length && *data == '+') {
961 --length;
962 ++data;
963 }
964
965 if (!length || !isCharacterAllowedInBase(*data, base))
966 goto bye;
967
968 while (length && isCharacterAllowedInBase(*data, base)) {
969 --length;
970 IntegralType digitValue;
971 CharType c = *data;
972 if (isASCIIDigit(c))
973 digitValue = c - '0';
974 else if (c >= 'a')
975 digitValue = c - 'a' + 10;
976 else
977 digitValue = c - 'A' + 10;
978
979 if (value > maxMultiplier || (value == maxMultiplier && digitValue > (in tegralMax % base) + isNegative))
980 goto bye;
981
982 value = base * value + digitValue;
983 ++data;
984 }
985
986 #if COMPILER(MSVC)
987 #pragma warning(push, 0)
988 #pragma warning(disable:4146)
989 #endif
990
991 if (isNegative)
992 value = -value;
993
994 #if COMPILER(MSVC)
995 #pragma warning(pop)
996 #endif
997
998 // skip trailing space
999 while (length && isSpaceOrNewline(*data)) {
1000 --length;
1001 ++data;
1002 }
1003
1004 if (!length)
1005 isOk = true;
1006 bye:
1007 if (ok)
1008 *ok = isOk;
1009 return isOk ? value : 0;
1010 }
1011
1012 template <typename CharType>
1013 static unsigned lengthOfCharactersAsInteger(const CharType* data, size_t length)
1014 {
1015 size_t i = 0;
1016
1017 // Allow leading spaces.
1018 for (; i != length; ++i) {
1019 if (!isSpaceOrNewline(data[i]))
1020 break;
1021 }
1022
1023 // Allow sign.
1024 if (i != length && (data[i] == '+' || data[i] == '-'))
1025 ++i;
1026
1027 // Allow digits.
1028 for (; i != length; ++i) {
1029 if (!isASCIIDigit(data[i]))
1030 break;
1031 }
1032
1033 return i;
1034 }
1035
1036 int charactersToIntStrict(const LChar* data, size_t length, bool* ok, int base)
1037 {
1038 return toIntegralType<int, LChar>(data, length, ok, base);
1039 }
1040
1041 int charactersToIntStrict(const UChar* data, size_t length, bool* ok, int base)
1042 {
1043 return toIntegralType<int, UChar>(data, length, ok, base);
1044 }
1045
1046 unsigned charactersToUIntStrict(const LChar* data, size_t length, bool* ok, int base)
1047 {
1048 return toIntegralType<unsigned, LChar>(data, length, ok, base);
1049 }
1050
1051 unsigned charactersToUIntStrict(const UChar* data, size_t length, bool* ok, int base)
1052 {
1053 return toIntegralType<unsigned, UChar>(data, length, ok, base);
1054 }
1055
1056 int64_t charactersToInt64Strict(const LChar* data, size_t length, bool* ok, int base)
1057 {
1058 return toIntegralType<int64_t, LChar>(data, length, ok, base);
1059 }
1060
1061 int64_t charactersToInt64Strict(const UChar* data, size_t length, bool* ok, int base)
1062 {
1063 return toIntegralType<int64_t, UChar>(data, length, ok, base);
1064 }
1065
1066 uint64_t charactersToUInt64Strict(const LChar* data, size_t length, bool* ok, in t base)
1067 {
1068 return toIntegralType<uint64_t, LChar>(data, length, ok, base);
1069 }
1070
1071 uint64_t charactersToUInt64Strict(const UChar* data, size_t length, bool* ok, in t base)
1072 {
1073 return toIntegralType<uint64_t, UChar>(data, length, ok, base);
1074 }
1075
1076 intptr_t charactersToIntPtrStrict(const LChar* data, size_t length, bool* ok, in t base)
1077 {
1078 return toIntegralType<intptr_t, LChar>(data, length, ok, base);
1079 }
1080
1081 intptr_t charactersToIntPtrStrict(const UChar* data, size_t length, bool* ok, in t base)
1082 {
1083 return toIntegralType<intptr_t, UChar>(data, length, ok, base);
1084 }
1085
1086 int charactersToInt(const LChar* data, size_t length, bool* ok)
1087 {
1088 return toIntegralType<int, LChar>(data, lengthOfCharactersAsInteger<LChar>(d ata, length), ok, 10);
1089 }
1090
1091 int charactersToInt(const UChar* data, size_t length, bool* ok)
1092 {
1093 return toIntegralType<int, UChar>(data, lengthOfCharactersAsInteger(data, le ngth), ok, 10);
1094 }
1095
1096 unsigned charactersToUInt(const LChar* data, size_t length, bool* ok)
1097 {
1098 return toIntegralType<unsigned, LChar>(data, lengthOfCharactersAsInteger<LCh ar>(data, length), ok, 10);
1099 }
1100
1101 unsigned charactersToUInt(const UChar* data, size_t length, bool* ok)
1102 {
1103 return toIntegralType<unsigned, UChar>(data, lengthOfCharactersAsInteger<UCh ar>(data, length), ok, 10);
1104 }
1105
1106 int64_t charactersToInt64(const LChar* data, size_t length, bool* ok)
1107 {
1108 return toIntegralType<int64_t, LChar>(data, lengthOfCharactersAsInteger<LCha r>(data, length), ok, 10);
1109 }
1110
1111 int64_t charactersToInt64(const UChar* data, size_t length, bool* ok)
1112 {
1113 return toIntegralType<int64_t, UChar>(data, lengthOfCharactersAsInteger<UCha r>(data, length), ok, 10);
1114 }
1115
1116 uint64_t charactersToUInt64(const LChar* data, size_t length, bool* ok)
1117 {
1118 return toIntegralType<uint64_t, LChar>(data, lengthOfCharactersAsInteger<LCh ar>(data, length), ok, 10);
1119 }
1120
1121 uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok)
1122 {
1123 return toIntegralType<uint64_t, UChar>(data, lengthOfCharactersAsInteger<UCh ar>(data, length), ok, 10);
1124 }
1125
1126 intptr_t charactersToIntPtr(const LChar* data, size_t length, bool* ok)
1127 {
1128 return toIntegralType<intptr_t, LChar>(data, lengthOfCharactersAsInteger<LCh ar>(data, length), ok, 10);
1129 }
1130
1131 intptr_t charactersToIntPtr(const UChar* data, size_t length, bool* ok)
1132 {
1133 return toIntegralType<intptr_t, UChar>(data, lengthOfCharactersAsInteger<UCh ar>(data, length), ok, 10);
1134 }
1135
1136 enum TrailingJunkPolicy { DisallowTrailingJunk, AllowTrailingJunk };
1137
1138 template <typename CharType, TrailingJunkPolicy policy>
1139 static inline double toDoubleType(const CharType* data, size_t length, bool* ok, size_t& parsedLength)
1140 {
1141 size_t leadingSpacesLength = 0;
1142 while (leadingSpacesLength < length && isASCIISpace(data[leadingSpacesLength ]))
1143 ++leadingSpacesLength;
1144
1145 double number = parseDouble(data + leadingSpacesLength, length - leadingSpac esLength, parsedLength);
1146 if (!parsedLength) {
1147 if (ok)
1148 *ok = false;
1149 return 0.0;
1150 }
1151
1152 parsedLength += leadingSpacesLength;
1153 if (ok)
1154 *ok = policy == AllowTrailingJunk || parsedLength == length;
1155 return number;
1156 }
1157
1158 double charactersToDouble(const LChar* data, size_t length, bool* ok)
1159 {
1160 size_t parsedLength;
1161 return toDoubleType<LChar, DisallowTrailingJunk>(data, length, ok, parsedLen gth);
1162 }
1163
1164 double charactersToDouble(const UChar* data, size_t length, bool* ok)
1165 {
1166 size_t parsedLength;
1167 return toDoubleType<UChar, DisallowTrailingJunk>(data, length, ok, parsedLen gth);
1168 }
1169
1170 float charactersToFloat(const LChar* data, size_t length, bool* ok)
1171 {
1172 // FIXME: This will return ok even when the string fits into a double but no t a float.
1173 size_t parsedLength;
1174 return static_cast<float>(toDoubleType<LChar, DisallowTrailingJunk>(data, le ngth, ok, parsedLength));
1175 }
1176
1177 float charactersToFloat(const UChar* data, size_t length, bool* ok)
1178 {
1179 // FIXME: This will return ok even when the string fits into a double but no t a float.
1180 size_t parsedLength;
1181 return static_cast<float>(toDoubleType<UChar, DisallowTrailingJunk>(data, le ngth, ok, parsedLength));
1182 }
1183
1184 float charactersToFloat(const LChar* data, size_t length, size_t& parsedLength)
1185 {
1186 // FIXME: This will return ok even when the string fits into a double but no t a float.
1187 return static_cast<float>(toDoubleType<LChar, AllowTrailingJunk>(data, lengt h, 0, parsedLength));
1188 }
1189
1190 float charactersToFloat(const UChar* data, size_t length, size_t& parsedLength)
1191 {
1192 // FIXME: This will return ok even when the string fits into a double but no t a float.
1193 return static_cast<float>(toDoubleType<UChar, AllowTrailingJunk>(data, lengt h, 0, parsedLength));
1194 }
1195
1196 const String& emptyString()
1197 {
1198 DEFINE_STATIC_LOCAL(String, emptyString, (StringImpl::empty()));
1199 return emptyString;
1200 }
1201
1202 } // namespace WTF
1203
1204 #ifndef NDEBUG
1205 // For use in the debugger
1206 String* string(const char*);
1207 Vector<char> asciiDebug(StringImpl* impl);
1208 Vector<char> asciiDebug(String& string);
1209
1210 void String::show() const
1211 {
1212 dataLogF("%s\n", asciiDebug(impl()).data());
1213 }
1214
1215 String* string(const char* s)
1216 {
1217 // leaks memory!
1218 return new String(s);
1219 }
1220
1221 Vector<char> asciiDebug(StringImpl* impl)
1222 {
1223 if (!impl)
1224 return asciiDebug(String("[null]").impl());
1225
1226 Vector<char> buffer;
1227 for (unsigned i = 0; i < impl->length(); ++i) {
1228 UChar ch = (*impl)[i];
1229 if (isASCIIPrintable(ch)) {
1230 if (ch == '\\')
1231 buffer.append(ch);
1232 buffer.append(ch);
1233 } else {
1234 buffer.append('\\');
1235 buffer.append('u');
1236 appendUnsignedAsHexFixedSize(ch, buffer, 4);
1237 }
1238 }
1239 buffer.append('\0');
1240 return buffer;
1241 }
1242
1243 Vector<char> asciiDebug(String& string)
1244 {
1245 return asciiDebug(string.impl());
1246 }
1247
1248 #endif
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698