OLD | NEW |
| (Empty) |
1 /* | |
2 * (C) 1999 Lars Knoll (knoll@kde.org) | |
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010, 2012 Apple Inc. All rights
reserved. | |
4 * Copyright (C) 2007-2009 Torch Mobile, Inc. | |
5 * | |
6 * This library is free software; you can redistribute it and/or | |
7 * modify it under the terms of the GNU Library General Public | |
8 * License as published by the Free Software Foundation; either | |
9 * version 2 of the License, or (at your option) any later version. | |
10 * | |
11 * This library is distributed in the hope that it will be useful, | |
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 * Library General Public License for more details. | |
15 * | |
16 * You should have received a copy of the GNU Library General Public License | |
17 * along with this library; see the file COPYING.LIB. If not, write to | |
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
19 * Boston, MA 02110-1301, USA. | |
20 */ | |
21 | |
22 #include "config.h" | |
23 #include "WTFString.h" | |
24 | |
25 #include "IntegerToStringConversion.h" | |
26 #include <stdarg.h> | |
27 #include <wtf/ASCIICType.h> | |
28 #include <wtf/DataLog.h> | |
29 #include <wtf/HexNumber.h> | |
30 #include <wtf/MathExtras.h> | |
31 #include <wtf/text/CString.h> | |
32 #include <wtf/StringExtras.h> | |
33 #include <wtf/Vector.h> | |
34 #include <wtf/dtoa.h> | |
35 #include <wtf/unicode/CharacterNames.h> | |
36 #include <wtf/unicode/UTF8.h> | |
37 #include <wtf/unicode/Unicode.h> | |
38 | |
39 using namespace std; | |
40 | |
41 namespace WTF { | |
42 | |
43 using namespace Unicode; | |
44 using namespace std; | |
45 | |
46 // Construct a string with UTF-16 data. | |
47 String::String(const UChar* characters, unsigned length) | |
48 : m_impl(characters ? StringImpl::create(characters, length) : 0) | |
49 { | |
50 } | |
51 | |
52 // Construct a string with UTF-16 data, from a null-terminated source. | |
53 String::String(const UChar* str) | |
54 { | |
55 if (!str) | |
56 return; | |
57 | |
58 size_t len = 0; | |
59 while (str[len] != UChar(0)) | |
60 ++len; | |
61 | |
62 RELEASE_ASSERT(len <= numeric_limits<unsigned>::max()); | |
63 | |
64 m_impl = StringImpl::create(str, len); | |
65 } | |
66 | |
67 // Construct a string with latin1 data. | |
68 String::String(const LChar* characters, unsigned length) | |
69 : m_impl(characters ? StringImpl::create(characters, length) : 0) | |
70 { | |
71 } | |
72 | |
73 String::String(const char* characters, unsigned length) | |
74 : m_impl(characters ? StringImpl::create(reinterpret_cast<const LChar*>(char
acters), length) : 0) | |
75 { | |
76 } | |
77 | |
78 // Construct a string with latin1 data, from a null-terminated source. | |
79 String::String(const LChar* characters) | |
80 : m_impl(characters ? StringImpl::create(characters) : 0) | |
81 { | |
82 } | |
83 | |
84 String::String(const char* characters) | |
85 : m_impl(characters ? StringImpl::create(reinterpret_cast<const LChar*>(char
acters)) : 0) | |
86 { | |
87 } | |
88 | |
89 String::String(ASCIILiteral characters) | |
90 : m_impl(StringImpl::createFromLiteral(characters)) | |
91 { | |
92 } | |
93 | |
94 void String::append(const String& str) | |
95 { | |
96 if (str.isEmpty()) | |
97 return; | |
98 | |
99 // FIXME: This is extremely inefficient. So much so that we might want to ta
ke this | |
100 // out of String's API. We can make it better by optimizing the case where e
xactly | |
101 // one String is pointing at this StringImpl, but even then it's going to re
quire a | |
102 // call to fastMalloc every single time. | |
103 if (str.m_impl) { | |
104 if (m_impl) { | |
105 if (m_impl->is8Bit() && str.m_impl->is8Bit()) { | |
106 LChar* data; | |
107 RELEASE_ASSERT(str.length() <= numeric_limits<unsigned>::max() -
m_impl->length()); | |
108 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_i
mpl->length() + str.length(), data); | |
109 memcpy(data, m_impl->characters8(), m_impl->length() * sizeof(LC
har)); | |
110 memcpy(data + m_impl->length(), str.characters8(), str.length()
* sizeof(LChar)); | |
111 m_impl = newImpl.release(); | |
112 return; | |
113 } | |
114 UChar* data; | |
115 RELEASE_ASSERT(str.length() <= numeric_limits<unsigned>::max() - m_i
mpl->length()); | |
116 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl-
>length() + str.length(), data); | |
117 memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar))
; | |
118 memcpy(data + m_impl->length(), str.characters(), str.length() * siz
eof(UChar)); | |
119 m_impl = newImpl.release(); | |
120 } else | |
121 m_impl = str.m_impl; | |
122 } | |
123 } | |
124 | |
125 void String::append(LChar c) | |
126 { | |
127 // FIXME: This is extremely inefficient. So much so that we might want to ta
ke this | |
128 // out of String's API. We can make it better by optimizing the case where e
xactly | |
129 // one String is pointing at this StringImpl, but even then it's going to re
quire a | |
130 // call to fastMalloc every single time. | |
131 if (m_impl) { | |
132 UChar* data; | |
133 RELEASE_ASSERT(m_impl->length() < numeric_limits<unsigned>::max()); | |
134 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->len
gth() + 1, data); | |
135 memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar)); | |
136 data[m_impl->length()] = c; | |
137 m_impl = newImpl.release(); | |
138 } else | |
139 m_impl = StringImpl::create(&c, 1); | |
140 } | |
141 | |
142 void String::append(UChar c) | |
143 { | |
144 // FIXME: This is extremely inefficient. So much so that we might want to ta
ke this | |
145 // out of String's API. We can make it better by optimizing the case where e
xactly | |
146 // one String is pointing at this StringImpl, but even then it's going to re
quire a | |
147 // call to fastMalloc every single time. | |
148 if (m_impl) { | |
149 UChar* data; | |
150 RELEASE_ASSERT(m_impl->length() < numeric_limits<unsigned>::max()); | |
151 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->len
gth() + 1, data); | |
152 memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar)); | |
153 data[m_impl->length()] = c; | |
154 m_impl = newImpl.release(); | |
155 } else | |
156 m_impl = StringImpl::create(&c, 1); | |
157 } | |
158 | |
159 int codePointCompare(const String& a, const String& b) | |
160 { | |
161 return codePointCompare(a.impl(), b.impl()); | |
162 } | |
163 | |
164 void String::insert(const String& str, unsigned pos) | |
165 { | |
166 if (str.isEmpty()) { | |
167 if (str.isNull()) | |
168 return; | |
169 if (isNull()) | |
170 m_impl = str.impl(); | |
171 return; | |
172 } | |
173 insert(str.characters(), str.length(), pos); | |
174 } | |
175 | |
176 void String::append(const LChar* charactersToAppend, unsigned lengthToAppend) | |
177 { | |
178 if (!m_impl) { | |
179 if (!charactersToAppend) | |
180 return; | |
181 m_impl = StringImpl::create(charactersToAppend, lengthToAppend); | |
182 return; | |
183 } | |
184 | |
185 if (!lengthToAppend) | |
186 return; | |
187 | |
188 ASSERT(charactersToAppend); | |
189 | |
190 unsigned strLength = m_impl->length(); | |
191 | |
192 if (m_impl->is8Bit()) { | |
193 RELEASE_ASSERT(lengthToAppend <= numeric_limits<unsigned>::max() - strLe
ngth); | |
194 LChar* data; | |
195 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(strLength +
lengthToAppend, data); | |
196 StringImpl::copyChars(data, m_impl->characters8(), strLength); | |
197 StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppe
nd); | |
198 m_impl = newImpl.release(); | |
199 return; | |
200 } | |
201 | |
202 RELEASE_ASSERT(lengthToAppend <= numeric_limits<unsigned>::max() - strLength
); | |
203 UChar* data; | |
204 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() + leng
thToAppend, data); | |
205 StringImpl::copyChars(data, m_impl->characters16(), strLength); | |
206 StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppend); | |
207 m_impl = newImpl.release(); | |
208 } | |
209 | |
210 void String::append(const UChar* charactersToAppend, unsigned lengthToAppend) | |
211 { | |
212 if (!m_impl) { | |
213 if (!charactersToAppend) | |
214 return; | |
215 m_impl = StringImpl::create(charactersToAppend, lengthToAppend); | |
216 return; | |
217 } | |
218 | |
219 if (!lengthToAppend) | |
220 return; | |
221 | |
222 unsigned strLength = m_impl->length(); | |
223 | |
224 ASSERT(charactersToAppend); | |
225 RELEASE_ASSERT(lengthToAppend <= numeric_limits<unsigned>::max() - strLength
); | |
226 UChar* data; | |
227 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(strLength + len
gthToAppend, data); | |
228 if (m_impl->is8Bit()) | |
229 StringImpl::copyChars(data, characters8(), strLength); | |
230 else | |
231 StringImpl::copyChars(data, characters16(), strLength); | |
232 StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppend); | |
233 m_impl = newImpl.release(); | |
234 } | |
235 | |
236 | |
237 void String::insert(const UChar* charactersToInsert, unsigned lengthToInsert, un
signed position) | |
238 { | |
239 if (position >= length()) { | |
240 append(charactersToInsert, lengthToInsert); | |
241 return; | |
242 } | |
243 | |
244 ASSERT(m_impl); | |
245 | |
246 if (!lengthToInsert) | |
247 return; | |
248 | |
249 ASSERT(charactersToInsert); | |
250 UChar* data; | |
251 RELEASE_ASSERT(lengthToInsert <= numeric_limits<unsigned>::max() - length())
; | |
252 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() + leng
thToInsert, data); | |
253 memcpy(data, characters(), position * sizeof(UChar)); | |
254 memcpy(data + position, charactersToInsert, lengthToInsert * sizeof(UChar)); | |
255 memcpy(data + position + lengthToInsert, characters() + position, (length()
- position) * sizeof(UChar)); | |
256 m_impl = newImpl.release(); | |
257 } | |
258 | |
259 UChar32 String::characterStartingAt(unsigned i) const | |
260 { | |
261 if (!m_impl || i >= m_impl->length()) | |
262 return 0; | |
263 return m_impl->characterStartingAt(i); | |
264 } | |
265 | |
266 void String::truncate(unsigned position) | |
267 { | |
268 if (position >= length()) | |
269 return; | |
270 UChar* data; | |
271 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(position, data)
; | |
272 memcpy(data, characters(), position * sizeof(UChar)); | |
273 m_impl = newImpl.release(); | |
274 } | |
275 | |
276 template <typename CharacterType> | |
277 inline void String::removeInternal(const CharacterType* characters, unsigned pos
ition, int lengthToRemove) | |
278 { | |
279 CharacterType* data; | |
280 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() - leng
thToRemove, data); | |
281 memcpy(data, characters, position * sizeof(CharacterType)); | |
282 memcpy(data + position, characters + position + lengthToRemove, | |
283 (length() - lengthToRemove - position) * sizeof(CharacterType)); | |
284 | |
285 m_impl = newImpl.release(); | |
286 } | |
287 | |
288 void String::remove(unsigned position, int lengthToRemove) | |
289 { | |
290 if (lengthToRemove <= 0) | |
291 return; | |
292 if (position >= length()) | |
293 return; | |
294 if (static_cast<unsigned>(lengthToRemove) > length() - position) | |
295 lengthToRemove = length() - position; | |
296 | |
297 if (is8Bit()) { | |
298 removeInternal(characters8(), position, lengthToRemove); | |
299 | |
300 return; | |
301 } | |
302 | |
303 removeInternal(characters16(), position, lengthToRemove); | |
304 } | |
305 | |
306 String String::substring(unsigned pos, unsigned len) const | |
307 { | |
308 if (!m_impl) | |
309 return String(); | |
310 return m_impl->substring(pos, len); | |
311 } | |
312 | |
313 String String::substringSharingImpl(unsigned offset, unsigned length) const | |
314 { | |
315 // FIXME: We used to check against a limit of Heap::minExtraCost / sizeof(UC
har). | |
316 | |
317 unsigned stringLength = this->length(); | |
318 offset = min(offset, stringLength); | |
319 length = min(length, stringLength - offset); | |
320 | |
321 if (!offset && length == stringLength) | |
322 return *this; | |
323 return String(StringImpl::create(m_impl, offset, length)); | |
324 } | |
325 | |
326 String String::lower() const | |
327 { | |
328 if (!m_impl) | |
329 return String(); | |
330 return m_impl->lower(); | |
331 } | |
332 | |
333 String String::upper() const | |
334 { | |
335 if (!m_impl) | |
336 return String(); | |
337 return m_impl->upper(); | |
338 } | |
339 | |
340 String String::stripWhiteSpace() const | |
341 { | |
342 if (!m_impl) | |
343 return String(); | |
344 return m_impl->stripWhiteSpace(); | |
345 } | |
346 | |
347 String String::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace) const | |
348 { | |
349 if (!m_impl) | |
350 return String(); | |
351 return m_impl->stripWhiteSpace(isWhiteSpace); | |
352 } | |
353 | |
354 String String::simplifyWhiteSpace() const | |
355 { | |
356 if (!m_impl) | |
357 return String(); | |
358 return m_impl->simplifyWhiteSpace(); | |
359 } | |
360 | |
361 String String::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace) const | |
362 { | |
363 if (!m_impl) | |
364 return String(); | |
365 return m_impl->simplifyWhiteSpace(isWhiteSpace); | |
366 } | |
367 | |
368 String String::removeCharacters(CharacterMatchFunctionPtr findMatch) const | |
369 { | |
370 if (!m_impl) | |
371 return String(); | |
372 return m_impl->removeCharacters(findMatch); | |
373 } | |
374 | |
375 String String::foldCase() const | |
376 { | |
377 if (!m_impl) | |
378 return String(); | |
379 return m_impl->foldCase(); | |
380 } | |
381 | |
382 bool String::percentage(int& result) const | |
383 { | |
384 if (!m_impl || !m_impl->length()) | |
385 return false; | |
386 | |
387 if ((*m_impl)[m_impl->length() - 1] != '%') | |
388 return false; | |
389 | |
390 result = charactersToIntStrict(m_impl->characters(), m_impl->length() - 1); | |
391 return true; | |
392 } | |
393 | |
394 const UChar* String::charactersWithNullTermination() | |
395 { | |
396 if (!m_impl) | |
397 return 0; | |
398 if (m_impl->hasTerminatingNullCharacter()) | |
399 return m_impl->characters(); | |
400 m_impl = StringImpl::createWithTerminatingNullCharacter(*m_impl); | |
401 return m_impl->characters(); | |
402 } | |
403 | |
404 String String::format(const char *format, ...) | |
405 { | |
406 #if OS(WINCE) | |
407 va_list args; | |
408 va_start(args, format); | |
409 | |
410 Vector<char, 256> buffer; | |
411 | |
412 int bufferSize = 256; | |
413 buffer.resize(bufferSize); | |
414 for (;;) { | |
415 int written = vsnprintf(buffer.data(), bufferSize, format, args); | |
416 va_end(args); | |
417 | |
418 if (written == 0) | |
419 return String(""); | |
420 if (written > 0) | |
421 return StringImpl::create(reinterpret_cast<const LChar*>(buffer.data
()), written); | |
422 | |
423 bufferSize <<= 1; | |
424 buffer.resize(bufferSize); | |
425 va_start(args, format); | |
426 } | |
427 | |
428 #else | |
429 va_list args; | |
430 va_start(args, format); | |
431 | |
432 Vector<char, 256> buffer; | |
433 | |
434 // Do the format once to get the length. | |
435 #if COMPILER(MSVC) | |
436 int result = _vscprintf(format, args); | |
437 #else | |
438 char ch; | |
439 int result = vsnprintf(&ch, 1, format, args); | |
440 // We need to call va_end() and then va_start() again here, as the | |
441 // contents of args is undefined after the call to vsnprintf | |
442 // according to http://man.cx/snprintf(3) | |
443 // | |
444 // Not calling va_end/va_start here happens to work on lots of | |
445 // systems, but fails e.g. on 64bit Linux. | |
446 va_end(args); | |
447 va_start(args, format); | |
448 #endif | |
449 | |
450 if (result == 0) | |
451 return String(""); | |
452 if (result < 0) | |
453 return String(); | |
454 unsigned len = result; | |
455 buffer.grow(len + 1); | |
456 | |
457 // Now do the formatting again, guaranteed to fit. | |
458 vsnprintf(buffer.data(), buffer.size(), format, args); | |
459 | |
460 va_end(args); | |
461 | |
462 return StringImpl::create(reinterpret_cast<const LChar*>(buffer.data()), len
); | |
463 #endif | |
464 } | |
465 | |
466 String String::number(int number) | |
467 { | |
468 return numberToStringSigned<String>(number); | |
469 } | |
470 | |
471 String String::number(unsigned int number) | |
472 { | |
473 return numberToStringUnsigned<String>(number); | |
474 } | |
475 | |
476 String String::number(long number) | |
477 { | |
478 return numberToStringSigned<String>(number); | |
479 } | |
480 | |
481 String String::number(unsigned long number) | |
482 { | |
483 return numberToStringUnsigned<String>(number); | |
484 } | |
485 | |
486 String String::number(long long number) | |
487 { | |
488 return numberToStringSigned<String>(number); | |
489 } | |
490 | |
491 String String::number(unsigned long long number) | |
492 { | |
493 return numberToStringUnsigned<String>(number); | |
494 } | |
495 | |
496 String String::number(double number, unsigned precision, TrailingZerosTruncating
Policy trailingZerosTruncatingPolicy) | |
497 { | |
498 NumberToStringBuffer buffer; | |
499 return String(numberToFixedPrecisionString(number, precision, buffer, traili
ngZerosTruncatingPolicy == TruncateTrailingZeros)); | |
500 } | |
501 | |
502 String String::numberToStringECMAScript(double number) | |
503 { | |
504 NumberToStringBuffer buffer; | |
505 return String(numberToString(number, buffer)); | |
506 } | |
507 | |
508 String String::numberToStringFixedWidth(double number, unsigned decimalPlaces) | |
509 { | |
510 NumberToStringBuffer buffer; | |
511 return String(numberToFixedWidthString(number, decimalPlaces, buffer)); | |
512 } | |
513 | |
514 int String::toIntStrict(bool* ok, int base) const | |
515 { | |
516 if (!m_impl) { | |
517 if (ok) | |
518 *ok = false; | |
519 return 0; | |
520 } | |
521 return m_impl->toIntStrict(ok, base); | |
522 } | |
523 | |
524 unsigned String::toUIntStrict(bool* ok, int base) const | |
525 { | |
526 if (!m_impl) { | |
527 if (ok) | |
528 *ok = false; | |
529 return 0; | |
530 } | |
531 return m_impl->toUIntStrict(ok, base); | |
532 } | |
533 | |
534 int64_t String::toInt64Strict(bool* ok, int base) const | |
535 { | |
536 if (!m_impl) { | |
537 if (ok) | |
538 *ok = false; | |
539 return 0; | |
540 } | |
541 return m_impl->toInt64Strict(ok, base); | |
542 } | |
543 | |
544 uint64_t String::toUInt64Strict(bool* ok, int base) const | |
545 { | |
546 if (!m_impl) { | |
547 if (ok) | |
548 *ok = false; | |
549 return 0; | |
550 } | |
551 return m_impl->toUInt64Strict(ok, base); | |
552 } | |
553 | |
554 intptr_t String::toIntPtrStrict(bool* ok, int base) const | |
555 { | |
556 if (!m_impl) { | |
557 if (ok) | |
558 *ok = false; | |
559 return 0; | |
560 } | |
561 return m_impl->toIntPtrStrict(ok, base); | |
562 } | |
563 | |
564 int String::toInt(bool* ok) const | |
565 { | |
566 if (!m_impl) { | |
567 if (ok) | |
568 *ok = false; | |
569 return 0; | |
570 } | |
571 return m_impl->toInt(ok); | |
572 } | |
573 | |
574 unsigned String::toUInt(bool* ok) const | |
575 { | |
576 if (!m_impl) { | |
577 if (ok) | |
578 *ok = false; | |
579 return 0; | |
580 } | |
581 return m_impl->toUInt(ok); | |
582 } | |
583 | |
584 int64_t String::toInt64(bool* ok) const | |
585 { | |
586 if (!m_impl) { | |
587 if (ok) | |
588 *ok = false; | |
589 return 0; | |
590 } | |
591 return m_impl->toInt64(ok); | |
592 } | |
593 | |
594 uint64_t String::toUInt64(bool* ok) const | |
595 { | |
596 if (!m_impl) { | |
597 if (ok) | |
598 *ok = false; | |
599 return 0; | |
600 } | |
601 return m_impl->toUInt64(ok); | |
602 } | |
603 | |
604 intptr_t String::toIntPtr(bool* ok) const | |
605 { | |
606 if (!m_impl) { | |
607 if (ok) | |
608 *ok = false; | |
609 return 0; | |
610 } | |
611 return m_impl->toIntPtr(ok); | |
612 } | |
613 | |
614 double String::toDouble(bool* ok) const | |
615 { | |
616 if (!m_impl) { | |
617 if (ok) | |
618 *ok = false; | |
619 return 0.0; | |
620 } | |
621 return m_impl->toDouble(ok); | |
622 } | |
623 | |
624 float String::toFloat(bool* ok) const | |
625 { | |
626 if (!m_impl) { | |
627 if (ok) | |
628 *ok = false; | |
629 return 0.0f; | |
630 } | |
631 return m_impl->toFloat(ok); | |
632 } | |
633 | |
634 String String::isolatedCopy() const | |
635 { | |
636 if (!m_impl) | |
637 return String(); | |
638 return m_impl->isolatedCopy(); | |
639 } | |
640 | |
641 bool String::isSafeToSendToAnotherThread() const | |
642 { | |
643 if (!impl()) | |
644 return true; | |
645 // AtomicStrings are not safe to send between threads as ~StringImpl() | |
646 // will try to remove them from the wrong AtomicStringTable. | |
647 if (impl()->isAtomic()) | |
648 return false; | |
649 if (impl()->hasOneRef()) | |
650 return true; | |
651 if (isEmpty()) | |
652 return true; | |
653 return false; | |
654 } | |
655 | |
656 void String::split(const String& separator, bool allowEmptyEntries, Vector<Strin
g>& result) const | |
657 { | |
658 result.clear(); | |
659 | |
660 unsigned startPos = 0; | |
661 size_t endPos; | |
662 while ((endPos = find(separator, startPos)) != notFound) { | |
663 if (allowEmptyEntries || startPos != endPos) | |
664 result.append(substring(startPos, endPos - startPos)); | |
665 startPos = endPos + separator.length(); | |
666 } | |
667 if (allowEmptyEntries || startPos != length()) | |
668 result.append(substring(startPos)); | |
669 } | |
670 | |
671 void String::split(UChar separator, bool allowEmptyEntries, Vector<String>& resu
lt) const | |
672 { | |
673 result.clear(); | |
674 | |
675 unsigned startPos = 0; | |
676 size_t endPos; | |
677 while ((endPos = find(separator, startPos)) != notFound) { | |
678 if (allowEmptyEntries || startPos != endPos) | |
679 result.append(substring(startPos, endPos - startPos)); | |
680 startPos = endPos + 1; | |
681 } | |
682 if (allowEmptyEntries || startPos != length()) | |
683 result.append(substring(startPos)); | |
684 } | |
685 | |
686 CString String::ascii() const | |
687 { | |
688 // Printable ASCII characters 32..127 and the null character are | |
689 // preserved, characters outside of this range are converted to '?'. | |
690 | |
691 unsigned length = this->length(); | |
692 if (!length) { | |
693 char* characterBuffer; | |
694 return CString::newUninitialized(length, characterBuffer); | |
695 } | |
696 | |
697 if (this->is8Bit()) { | |
698 const LChar* characters = this->characters8(); | |
699 | |
700 char* characterBuffer; | |
701 CString result = CString::newUninitialized(length, characterBuffer); | |
702 | |
703 for (unsigned i = 0; i < length; ++i) { | |
704 LChar ch = characters[i]; | |
705 characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch; | |
706 } | |
707 | |
708 return result; | |
709 } | |
710 | |
711 const UChar* characters = this->characters16(); | |
712 | |
713 char* characterBuffer; | |
714 CString result = CString::newUninitialized(length, characterBuffer); | |
715 | |
716 for (unsigned i = 0; i < length; ++i) { | |
717 UChar ch = characters[i]; | |
718 characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch; | |
719 } | |
720 | |
721 return result; | |
722 } | |
723 | |
724 CString String::latin1() const | |
725 { | |
726 // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are | |
727 // preserved, characters outside of this range are converted to '?'. | |
728 | |
729 unsigned length = this->length(); | |
730 | |
731 if (!length) | |
732 return CString("", 0); | |
733 | |
734 if (is8Bit()) | |
735 return CString(reinterpret_cast<const char*>(this->characters8()), lengt
h); | |
736 | |
737 const UChar* characters = this->characters16(); | |
738 | |
739 char* characterBuffer; | |
740 CString result = CString::newUninitialized(length, characterBuffer); | |
741 | |
742 for (unsigned i = 0; i < length; ++i) { | |
743 UChar ch = characters[i]; | |
744 characterBuffer[i] = ch > 0xff ? '?' : ch; | |
745 } | |
746 | |
747 return result; | |
748 } | |
749 | |
750 // Helper to write a three-byte UTF-8 code point to the buffer, caller must chec
k room is available. | |
751 static inline void putUTF8Triple(char*& buffer, UChar ch) | |
752 { | |
753 ASSERT(ch >= 0x0800); | |
754 *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0); | |
755 *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80); | |
756 *buffer++ = static_cast<char>((ch & 0x3F) | 0x80); | |
757 } | |
758 | |
759 CString String::utf8(ConversionMode mode) const | |
760 { | |
761 unsigned length = this->length(); | |
762 | |
763 if (!length) | |
764 return CString("", 0); | |
765 | |
766 // Allocate a buffer big enough to hold all the characters | |
767 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes). | |
768 // Optimization ideas, if we find this function is hot: | |
769 // * We could speculatively create a CStringBuffer to contain 'length' | |
770 // characters, and resize if necessary (i.e. if the buffer contains | |
771 // non-ascii characters). (Alternatively, scan the buffer first for | |
772 // ascii characters, so we know this will be sufficient). | |
773 // * We could allocate a CStringBuffer with an appropriate size to | |
774 // have a good chance of being able to write the string into the | |
775 // buffer without reallocing (say, 1.5 x length). | |
776 if (length > numeric_limits<unsigned>::max() / 3) | |
777 return CString(); | |
778 Vector<char, 1024> bufferVector(length * 3); | |
779 | |
780 char* buffer = bufferVector.data(); | |
781 | |
782 if (is8Bit()) { | |
783 const LChar* characters = this->characters8(); | |
784 | |
785 ConversionResult result = convertLatin1ToUTF8(&characters, characters +
length, &buffer, buffer + bufferVector.size()); | |
786 ASSERT_UNUSED(result, result != targetExhausted); // (length * 3) should
be sufficient for any conversion | |
787 } else { | |
788 const UChar* characters = this->characters16(); | |
789 | |
790 if (mode == StrictConversionReplacingUnpairedSurrogatesWithFFFD) { | |
791 const UChar* charactersEnd = characters + length; | |
792 char* bufferEnd = buffer + bufferVector.size(); | |
793 while (characters < charactersEnd) { | |
794 // Use strict conversion to detect unpaired surrogates. | |
795 ConversionResult result = convertUTF16ToUTF8(&characters, charac
tersEnd, &buffer, bufferEnd, true); | |
796 ASSERT(result != targetExhausted); | |
797 // Conversion fails when there is an unpaired surrogate. | |
798 // Put replacement character (U+FFFD) instead of the unpaired su
rrogate. | |
799 if (result != conversionOK) { | |
800 ASSERT((0xD800 <= *characters && *characters <= 0xDFFF)); | |
801 // There should be room left, since one UChar hasn't been co
nverted. | |
802 ASSERT((buffer + 3) <= bufferEnd); | |
803 putUTF8Triple(buffer, replacementCharacter); | |
804 ++characters; | |
805 } | |
806 } | |
807 } else { | |
808 bool strict = mode == StrictConversion; | |
809 ConversionResult result = convertUTF16ToUTF8(&characters, characters
+ length, &buffer, buffer + bufferVector.size(), strict); | |
810 ASSERT(result != targetExhausted); // (length * 3) should be suffici
ent for any conversion | |
811 | |
812 // Only produced from strict conversion. | |
813 if (result == sourceIllegal) { | |
814 ASSERT(strict); | |
815 return CString(); | |
816 } | |
817 | |
818 // Check for an unconverted high surrogate. | |
819 if (result == sourceExhausted) { | |
820 if (strict) | |
821 return CString(); | |
822 // This should be one unpaired high surrogate. Treat it the same | |
823 // was as an unpaired high surrogate would have been handled in | |
824 // the middle of a string with non-strict conversion - which is | |
825 // to say, simply encode it to UTF-8. | |
826 ASSERT((characters + 1) == (this->characters() + length)); | |
827 ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF)); | |
828 // There should be room left, since one UChar hasn't been conver
ted. | |
829 ASSERT((buffer + 3) <= (buffer + bufferVector.size())); | |
830 putUTF8Triple(buffer, *characters); | |
831 } | |
832 } | |
833 } | |
834 | |
835 return CString(bufferVector.data(), buffer - bufferVector.data()); | |
836 } | |
837 | |
838 String String::make8BitFrom16BitSource(const UChar* source, size_t length) | |
839 { | |
840 if (!length) | |
841 return String(); | |
842 | |
843 LChar* destination; | |
844 String result = String::createUninitialized(length, destination); | |
845 | |
846 copyLCharsFromUCharSource(destination, source, length); | |
847 | |
848 return result; | |
849 } | |
850 | |
851 String String::make16BitFrom8BitSource(const LChar* source, size_t length) | |
852 { | |
853 if (!length) | |
854 return String(); | |
855 | |
856 UChar* destination; | |
857 String result = String::createUninitialized(length, destination); | |
858 | |
859 StringImpl::copyChars(destination, source, length); | |
860 | |
861 return result; | |
862 } | |
863 | |
864 String String::fromUTF8(const LChar* stringStart, size_t length) | |
865 { | |
866 RELEASE_ASSERT(length <= numeric_limits<unsigned>::max()); | |
867 | |
868 if (!stringStart) | |
869 return String(); | |
870 | |
871 if (!length) | |
872 return emptyString(); | |
873 | |
874 // We'll use a StringImpl as a buffer; if the source string only contains as
cii this should be | |
875 // the right length, if there are any multi-byte sequences this buffer will
be too large. | |
876 UChar* buffer; | |
877 String stringBuffer(StringImpl::createUninitialized(length, buffer)); | |
878 UChar* bufferEnd = buffer + length; | |
879 | |
880 // Try converting into the buffer. | |
881 const char* stringCurrent = reinterpret_cast<const char*>(stringStart); | |
882 bool isAllASCII; | |
883 if (convertUTF8ToUTF16(&stringCurrent, reinterpret_cast<const char *>(string
Start + length), &buffer, bufferEnd, &isAllASCII) != conversionOK) | |
884 return String(); | |
885 | |
886 if (isAllASCII) | |
887 return String(stringStart, length); | |
888 | |
889 // stringBuffer is full (the input must have been all ascii) so just return
it! | |
890 if (buffer == bufferEnd) | |
891 return stringBuffer; | |
892 | |
893 // stringBuffer served its purpose as a buffer, copy the contents out into a
new string. | |
894 unsigned utf16Length = buffer - stringBuffer.characters(); | |
895 ASSERT(utf16Length < length); | |
896 return String(stringBuffer.characters(), utf16Length); | |
897 } | |
898 | |
899 String String::fromUTF8(const LChar* string) | |
900 { | |
901 if (!string) | |
902 return String(); | |
903 return fromUTF8(string, strlen(reinterpret_cast<const char*>(string))); | |
904 } | |
905 | |
906 String String::fromUTF8(const CString& s) | |
907 { | |
908 return fromUTF8(s.data()); | |
909 } | |
910 | |
911 String String::fromUTF8WithLatin1Fallback(const LChar* string, size_t size) | |
912 { | |
913 String utf8 = fromUTF8(string, size); | |
914 if (!utf8) | |
915 return String(string, size); | |
916 return utf8; | |
917 } | |
918 | |
919 // String Operations | |
920 | |
921 static bool isCharacterAllowedInBase(UChar c, int base) | |
922 { | |
923 if (c > 0x7F) | |
924 return false; | |
925 if (isASCIIDigit(c)) | |
926 return c - '0' < base; | |
927 if (isASCIIAlpha(c)) { | |
928 if (base > 36) | |
929 base = 36; | |
930 return (c >= 'a' && c < 'a' + base - 10) | |
931 || (c >= 'A' && c < 'A' + base - 10); | |
932 } | |
933 return false; | |
934 } | |
935 | |
936 template <typename IntegralType, typename CharType> | |
937 static inline IntegralType toIntegralType(const CharType* data, size_t length, b
ool* ok, int base) | |
938 { | |
939 static const IntegralType integralMax = numeric_limits<IntegralType>::max(); | |
940 static const bool isSigned = numeric_limits<IntegralType>::is_signed; | |
941 const IntegralType maxMultiplier = integralMax / base; | |
942 | |
943 IntegralType value = 0; | |
944 bool isOk = false; | |
945 bool isNegative = false; | |
946 | |
947 if (!data) | |
948 goto bye; | |
949 | |
950 // skip leading whitespace | |
951 while (length && isSpaceOrNewline(*data)) { | |
952 --length; | |
953 ++data; | |
954 } | |
955 | |
956 if (isSigned && length && *data == '-') { | |
957 --length; | |
958 ++data; | |
959 isNegative = true; | |
960 } else if (length && *data == '+') { | |
961 --length; | |
962 ++data; | |
963 } | |
964 | |
965 if (!length || !isCharacterAllowedInBase(*data, base)) | |
966 goto bye; | |
967 | |
968 while (length && isCharacterAllowedInBase(*data, base)) { | |
969 --length; | |
970 IntegralType digitValue; | |
971 CharType c = *data; | |
972 if (isASCIIDigit(c)) | |
973 digitValue = c - '0'; | |
974 else if (c >= 'a') | |
975 digitValue = c - 'a' + 10; | |
976 else | |
977 digitValue = c - 'A' + 10; | |
978 | |
979 if (value > maxMultiplier || (value == maxMultiplier && digitValue > (in
tegralMax % base) + isNegative)) | |
980 goto bye; | |
981 | |
982 value = base * value + digitValue; | |
983 ++data; | |
984 } | |
985 | |
986 #if COMPILER(MSVC) | |
987 #pragma warning(push, 0) | |
988 #pragma warning(disable:4146) | |
989 #endif | |
990 | |
991 if (isNegative) | |
992 value = -value; | |
993 | |
994 #if COMPILER(MSVC) | |
995 #pragma warning(pop) | |
996 #endif | |
997 | |
998 // skip trailing space | |
999 while (length && isSpaceOrNewline(*data)) { | |
1000 --length; | |
1001 ++data; | |
1002 } | |
1003 | |
1004 if (!length) | |
1005 isOk = true; | |
1006 bye: | |
1007 if (ok) | |
1008 *ok = isOk; | |
1009 return isOk ? value : 0; | |
1010 } | |
1011 | |
1012 template <typename CharType> | |
1013 static unsigned lengthOfCharactersAsInteger(const CharType* data, size_t length) | |
1014 { | |
1015 size_t i = 0; | |
1016 | |
1017 // Allow leading spaces. | |
1018 for (; i != length; ++i) { | |
1019 if (!isSpaceOrNewline(data[i])) | |
1020 break; | |
1021 } | |
1022 | |
1023 // Allow sign. | |
1024 if (i != length && (data[i] == '+' || data[i] == '-')) | |
1025 ++i; | |
1026 | |
1027 // Allow digits. | |
1028 for (; i != length; ++i) { | |
1029 if (!isASCIIDigit(data[i])) | |
1030 break; | |
1031 } | |
1032 | |
1033 return i; | |
1034 } | |
1035 | |
1036 int charactersToIntStrict(const LChar* data, size_t length, bool* ok, int base) | |
1037 { | |
1038 return toIntegralType<int, LChar>(data, length, ok, base); | |
1039 } | |
1040 | |
1041 int charactersToIntStrict(const UChar* data, size_t length, bool* ok, int base) | |
1042 { | |
1043 return toIntegralType<int, UChar>(data, length, ok, base); | |
1044 } | |
1045 | |
1046 unsigned charactersToUIntStrict(const LChar* data, size_t length, bool* ok, int
base) | |
1047 { | |
1048 return toIntegralType<unsigned, LChar>(data, length, ok, base); | |
1049 } | |
1050 | |
1051 unsigned charactersToUIntStrict(const UChar* data, size_t length, bool* ok, int
base) | |
1052 { | |
1053 return toIntegralType<unsigned, UChar>(data, length, ok, base); | |
1054 } | |
1055 | |
1056 int64_t charactersToInt64Strict(const LChar* data, size_t length, bool* ok, int
base) | |
1057 { | |
1058 return toIntegralType<int64_t, LChar>(data, length, ok, base); | |
1059 } | |
1060 | |
1061 int64_t charactersToInt64Strict(const UChar* data, size_t length, bool* ok, int
base) | |
1062 { | |
1063 return toIntegralType<int64_t, UChar>(data, length, ok, base); | |
1064 } | |
1065 | |
1066 uint64_t charactersToUInt64Strict(const LChar* data, size_t length, bool* ok, in
t base) | |
1067 { | |
1068 return toIntegralType<uint64_t, LChar>(data, length, ok, base); | |
1069 } | |
1070 | |
1071 uint64_t charactersToUInt64Strict(const UChar* data, size_t length, bool* ok, in
t base) | |
1072 { | |
1073 return toIntegralType<uint64_t, UChar>(data, length, ok, base); | |
1074 } | |
1075 | |
1076 intptr_t charactersToIntPtrStrict(const LChar* data, size_t length, bool* ok, in
t base) | |
1077 { | |
1078 return toIntegralType<intptr_t, LChar>(data, length, ok, base); | |
1079 } | |
1080 | |
1081 intptr_t charactersToIntPtrStrict(const UChar* data, size_t length, bool* ok, in
t base) | |
1082 { | |
1083 return toIntegralType<intptr_t, UChar>(data, length, ok, base); | |
1084 } | |
1085 | |
1086 int charactersToInt(const LChar* data, size_t length, bool* ok) | |
1087 { | |
1088 return toIntegralType<int, LChar>(data, lengthOfCharactersAsInteger<LChar>(d
ata, length), ok, 10); | |
1089 } | |
1090 | |
1091 int charactersToInt(const UChar* data, size_t length, bool* ok) | |
1092 { | |
1093 return toIntegralType<int, UChar>(data, lengthOfCharactersAsInteger(data, le
ngth), ok, 10); | |
1094 } | |
1095 | |
1096 unsigned charactersToUInt(const LChar* data, size_t length, bool* ok) | |
1097 { | |
1098 return toIntegralType<unsigned, LChar>(data, lengthOfCharactersAsInteger<LCh
ar>(data, length), ok, 10); | |
1099 } | |
1100 | |
1101 unsigned charactersToUInt(const UChar* data, size_t length, bool* ok) | |
1102 { | |
1103 return toIntegralType<unsigned, UChar>(data, lengthOfCharactersAsInteger<UCh
ar>(data, length), ok, 10); | |
1104 } | |
1105 | |
1106 int64_t charactersToInt64(const LChar* data, size_t length, bool* ok) | |
1107 { | |
1108 return toIntegralType<int64_t, LChar>(data, lengthOfCharactersAsInteger<LCha
r>(data, length), ok, 10); | |
1109 } | |
1110 | |
1111 int64_t charactersToInt64(const UChar* data, size_t length, bool* ok) | |
1112 { | |
1113 return toIntegralType<int64_t, UChar>(data, lengthOfCharactersAsInteger<UCha
r>(data, length), ok, 10); | |
1114 } | |
1115 | |
1116 uint64_t charactersToUInt64(const LChar* data, size_t length, bool* ok) | |
1117 { | |
1118 return toIntegralType<uint64_t, LChar>(data, lengthOfCharactersAsInteger<LCh
ar>(data, length), ok, 10); | |
1119 } | |
1120 | |
1121 uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok) | |
1122 { | |
1123 return toIntegralType<uint64_t, UChar>(data, lengthOfCharactersAsInteger<UCh
ar>(data, length), ok, 10); | |
1124 } | |
1125 | |
1126 intptr_t charactersToIntPtr(const LChar* data, size_t length, bool* ok) | |
1127 { | |
1128 return toIntegralType<intptr_t, LChar>(data, lengthOfCharactersAsInteger<LCh
ar>(data, length), ok, 10); | |
1129 } | |
1130 | |
1131 intptr_t charactersToIntPtr(const UChar* data, size_t length, bool* ok) | |
1132 { | |
1133 return toIntegralType<intptr_t, UChar>(data, lengthOfCharactersAsInteger<UCh
ar>(data, length), ok, 10); | |
1134 } | |
1135 | |
1136 enum TrailingJunkPolicy { DisallowTrailingJunk, AllowTrailingJunk }; | |
1137 | |
1138 template <typename CharType, TrailingJunkPolicy policy> | |
1139 static inline double toDoubleType(const CharType* data, size_t length, bool* ok,
size_t& parsedLength) | |
1140 { | |
1141 size_t leadingSpacesLength = 0; | |
1142 while (leadingSpacesLength < length && isASCIISpace(data[leadingSpacesLength
])) | |
1143 ++leadingSpacesLength; | |
1144 | |
1145 double number = parseDouble(data + leadingSpacesLength, length - leadingSpac
esLength, parsedLength); | |
1146 if (!parsedLength) { | |
1147 if (ok) | |
1148 *ok = false; | |
1149 return 0.0; | |
1150 } | |
1151 | |
1152 parsedLength += leadingSpacesLength; | |
1153 if (ok) | |
1154 *ok = policy == AllowTrailingJunk || parsedLength == length; | |
1155 return number; | |
1156 } | |
1157 | |
1158 double charactersToDouble(const LChar* data, size_t length, bool* ok) | |
1159 { | |
1160 size_t parsedLength; | |
1161 return toDoubleType<LChar, DisallowTrailingJunk>(data, length, ok, parsedLen
gth); | |
1162 } | |
1163 | |
1164 double charactersToDouble(const UChar* data, size_t length, bool* ok) | |
1165 { | |
1166 size_t parsedLength; | |
1167 return toDoubleType<UChar, DisallowTrailingJunk>(data, length, ok, parsedLen
gth); | |
1168 } | |
1169 | |
1170 float charactersToFloat(const LChar* data, size_t length, bool* ok) | |
1171 { | |
1172 // FIXME: This will return ok even when the string fits into a double but no
t a float. | |
1173 size_t parsedLength; | |
1174 return static_cast<float>(toDoubleType<LChar, DisallowTrailingJunk>(data, le
ngth, ok, parsedLength)); | |
1175 } | |
1176 | |
1177 float charactersToFloat(const UChar* data, size_t length, bool* ok) | |
1178 { | |
1179 // FIXME: This will return ok even when the string fits into a double but no
t a float. | |
1180 size_t parsedLength; | |
1181 return static_cast<float>(toDoubleType<UChar, DisallowTrailingJunk>(data, le
ngth, ok, parsedLength)); | |
1182 } | |
1183 | |
1184 float charactersToFloat(const LChar* data, size_t length, size_t& parsedLength) | |
1185 { | |
1186 // FIXME: This will return ok even when the string fits into a double but no
t a float. | |
1187 return static_cast<float>(toDoubleType<LChar, AllowTrailingJunk>(data, lengt
h, 0, parsedLength)); | |
1188 } | |
1189 | |
1190 float charactersToFloat(const UChar* data, size_t length, size_t& parsedLength) | |
1191 { | |
1192 // FIXME: This will return ok even when the string fits into a double but no
t a float. | |
1193 return static_cast<float>(toDoubleType<UChar, AllowTrailingJunk>(data, lengt
h, 0, parsedLength)); | |
1194 } | |
1195 | |
1196 const String& emptyString() | |
1197 { | |
1198 DEFINE_STATIC_LOCAL(String, emptyString, (StringImpl::empty())); | |
1199 return emptyString; | |
1200 } | |
1201 | |
1202 } // namespace WTF | |
1203 | |
1204 #ifndef NDEBUG | |
1205 // For use in the debugger | |
1206 String* string(const char*); | |
1207 Vector<char> asciiDebug(StringImpl* impl); | |
1208 Vector<char> asciiDebug(String& string); | |
1209 | |
1210 void String::show() const | |
1211 { | |
1212 dataLogF("%s\n", asciiDebug(impl()).data()); | |
1213 } | |
1214 | |
1215 String* string(const char* s) | |
1216 { | |
1217 // leaks memory! | |
1218 return new String(s); | |
1219 } | |
1220 | |
1221 Vector<char> asciiDebug(StringImpl* impl) | |
1222 { | |
1223 if (!impl) | |
1224 return asciiDebug(String("[null]").impl()); | |
1225 | |
1226 Vector<char> buffer; | |
1227 for (unsigned i = 0; i < impl->length(); ++i) { | |
1228 UChar ch = (*impl)[i]; | |
1229 if (isASCIIPrintable(ch)) { | |
1230 if (ch == '\\') | |
1231 buffer.append(ch); | |
1232 buffer.append(ch); | |
1233 } else { | |
1234 buffer.append('\\'); | |
1235 buffer.append('u'); | |
1236 appendUnsignedAsHexFixedSize(ch, buffer, 4); | |
1237 } | |
1238 } | |
1239 buffer.append('\0'); | |
1240 return buffer; | |
1241 } | |
1242 | |
1243 Vector<char> asciiDebug(String& string) | |
1244 { | |
1245 return asciiDebug(string.impl()); | |
1246 } | |
1247 | |
1248 #endif | |
OLD | NEW |