Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1283)

Side by Side Diff: third_party/WebKit/Source/wtf/text/WTFString.cpp

Issue 2764243002: Move files in wtf/ to platform/wtf/ (Part 9). (Closed)
Patch Set: Rebase. Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/WebKit/Source/wtf/text/WTFString.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 * (C) 1999 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010, 2012 Apple Inc. All rights
4 * reserved.
5 * Copyright (C) 2007-2009 Torch Mobile, Inc.
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 */
22
23 #include "wtf/text/WTFString.h"
24
25 #include "base/strings/string_util.h"
26 #include "wtf/ASCIICType.h"
27 #include "wtf/DataLog.h"
28 #include "wtf/HexNumber.h"
29 #include "wtf/MathExtras.h"
30 #include "wtf/StringExtras.h"
31 #include "wtf/Vector.h"
32 #include "wtf/dtoa.h"
33 #include "wtf/text/CString.h"
34 #include "wtf/text/CharacterNames.h"
35 #include "wtf/text/IntegerToStringConversion.h"
36 #include "wtf/text/UTF8.h"
37 #include "wtf/text/Unicode.h"
38 #include <algorithm>
39 #include <stdarg.h>
40
41 namespace WTF {
42
43 using namespace Unicode;
44
45 namespace {
46
47 Vector<char> asciiDebug(StringImpl* impl) {
48 if (!impl)
49 return asciiDebug(String("[null]").impl());
50
51 Vector<char> buffer;
52 for (unsigned i = 0; i < impl->length(); ++i) {
53 UChar ch = (*impl)[i];
54 if (isASCIIPrintable(ch)) {
55 if (ch == '\\')
56 buffer.push_back('\\');
57 buffer.push_back(static_cast<char>(ch));
58 } else {
59 buffer.push_back('\\');
60 buffer.push_back('u');
61 HexNumber::appendUnsignedAsHexFixedSize(ch, buffer, 4);
62 }
63 }
64 buffer.push_back('\0');
65 return buffer;
66 }
67
68 } // namespace
69
70 // Construct a string with UTF-16 data.
71 String::String(const UChar* characters, unsigned length)
72 : m_impl(characters ? StringImpl::create(characters, length) : nullptr) {}
73
74 // Construct a string with UTF-16 data, from a null-terminated source.
75 String::String(const UChar* str) {
76 if (!str)
77 return;
78 m_impl = StringImpl::create(str, lengthOfNullTerminatedString(str));
79 }
80
81 // Construct a string with latin1 data.
82 String::String(const LChar* characters, unsigned length)
83 : m_impl(characters ? StringImpl::create(characters, length) : nullptr) {}
84
85 String::String(const char* characters, unsigned length)
86 : m_impl(characters ? StringImpl::create(
87 reinterpret_cast<const LChar*>(characters),
88 length)
89 : nullptr) {}
90
91 void String::append(const StringView& string) {
92 if (string.isEmpty())
93 return;
94 if (!m_impl) {
95 m_impl = string.toString().releaseImpl();
96 return;
97 }
98
99 // FIXME: This is extremely inefficient. So much so that we might want to
100 // take this out of String's API. We can make it better by optimizing the
101 // case where exactly one String is pointing at this StringImpl, but even
102 // then it's going to require a call into the allocator every single time.
103
104 if (m_impl->is8Bit() && string.is8Bit()) {
105 LChar* data;
106 RELEASE_ASSERT(string.length() <=
107 std::numeric_limits<unsigned>::max() - m_impl->length());
108 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(
109 m_impl->length() + string.length(), data);
110 memcpy(data, m_impl->characters8(), m_impl->length() * sizeof(LChar));
111 memcpy(data + m_impl->length(), string.characters8(),
112 string.length() * sizeof(LChar));
113 m_impl = newImpl.release();
114 return;
115 }
116
117 UChar* data;
118 RELEASE_ASSERT(string.length() <=
119 std::numeric_limits<unsigned>::max() - m_impl->length());
120 RefPtr<StringImpl> newImpl =
121 StringImpl::createUninitialized(m_impl->length() + string.length(), data);
122
123 if (m_impl->is8Bit())
124 StringImpl::copyChars(data, m_impl->characters8(), m_impl->length());
125 else
126 StringImpl::copyChars(data, m_impl->characters16(), m_impl->length());
127
128 if (string.is8Bit())
129 StringImpl::copyChars(data + m_impl->length(), string.characters8(),
130 string.length());
131 else
132 StringImpl::copyChars(data + m_impl->length(), string.characters16(),
133 string.length());
134
135 m_impl = newImpl.release();
136 }
137
138 template <typename CharacterType>
139 inline void String::appendInternal(CharacterType c) {
140 // FIXME: This is extremely inefficient. So much so that we might want to
141 // take this out of String's API. We can make it better by optimizing the
142 // case where exactly one String is pointing at this StringImpl, but even
143 // then it's going to require a call into the allocator every single time.
144 if (!m_impl) {
145 m_impl = StringImpl::create(&c, 1);
146 return;
147 }
148
149 // FIXME: We should be able to create an 8 bit string via this code path.
150 UChar* data;
151 RELEASE_ASSERT(m_impl->length() < std::numeric_limits<unsigned>::max());
152 RefPtr<StringImpl> newImpl =
153 StringImpl::createUninitialized(m_impl->length() + 1, data);
154 if (m_impl->is8Bit())
155 StringImpl::copyChars(data, m_impl->characters8(), m_impl->length());
156 else
157 StringImpl::copyChars(data, m_impl->characters16(), m_impl->length());
158 data[m_impl->length()] = c;
159 m_impl = newImpl.release();
160 }
161
162 void String::append(LChar c) {
163 appendInternal(c);
164 }
165
166 void String::append(UChar c) {
167 appendInternal(c);
168 }
169
170 int codePointCompare(const String& a, const String& b) {
171 return codePointCompare(a.impl(), b.impl());
172 }
173
174 int codePointCompareIgnoringASCIICase(const String& a, const char* b) {
175 return codePointCompareIgnoringASCIICase(a.impl(),
176 reinterpret_cast<const LChar*>(b));
177 }
178
179 template <typename CharType>
180 PassRefPtr<StringImpl> insertInternal(PassRefPtr<StringImpl> impl,
181 const CharType* charactersToInsert,
182 unsigned lengthToInsert,
183 unsigned position) {
184 if (!lengthToInsert)
185 return impl;
186
187 DCHECK(charactersToInsert);
188 UChar* data; // FIXME: We should be able to create an 8 bit string here.
189 RELEASE_ASSERT(lengthToInsert <=
190 std::numeric_limits<unsigned>::max() - impl->length());
191 RefPtr<StringImpl> newImpl =
192 StringImpl::createUninitialized(impl->length() + lengthToInsert, data);
193
194 if (impl->is8Bit())
195 StringImpl::copyChars(data, impl->characters8(), position);
196 else
197 StringImpl::copyChars(data, impl->characters16(), position);
198
199 StringImpl::copyChars(data + position, charactersToInsert, lengthToInsert);
200
201 if (impl->is8Bit())
202 StringImpl::copyChars(data + position + lengthToInsert,
203 impl->characters8() + position,
204 impl->length() - position);
205 else
206 StringImpl::copyChars(data + position + lengthToInsert,
207 impl->characters16() + position,
208 impl->length() - position);
209
210 return newImpl.release();
211 }
212
213 void String::insert(const StringView& string, unsigned position) {
214 if (string.isEmpty()) {
215 if (string.isNull())
216 return;
217 if (isNull())
218 m_impl = string.toString().releaseImpl();
219 return;
220 }
221
222 if (position >= length()) {
223 if (string.is8Bit())
224 append(string);
225 else
226 append(string);
227 return;
228 }
229
230 DCHECK(m_impl);
231 if (string.is8Bit())
232 m_impl = insertInternal(m_impl.release(), string.characters8(),
233 string.length(), position);
234 else
235 m_impl = insertInternal(m_impl.release(), string.characters16(),
236 string.length(), position);
237 }
238
239 UChar32 String::characterStartingAt(unsigned i) const {
240 if (!m_impl || i >= m_impl->length())
241 return 0;
242 return m_impl->characterStartingAt(i);
243 }
244
245 void String::ensure16Bit() {
246 if (isNull())
247 return;
248 if (!is8Bit())
249 return;
250 if (unsigned length = this->length())
251 m_impl =
252 make16BitFrom8BitSource(m_impl->characters8(), length).releaseImpl();
253 else
254 m_impl = StringImpl::empty16Bit;
255 }
256
257 void String::truncate(unsigned length) {
258 if (m_impl)
259 m_impl = m_impl->truncate(length);
260 }
261
262 void String::remove(unsigned start, unsigned lengthToRemove) {
263 if (m_impl)
264 m_impl = m_impl->remove(start, lengthToRemove);
265 }
266
267 String String::substring(unsigned pos, unsigned len) const {
268 if (!m_impl)
269 return String();
270 return m_impl->substring(pos, len);
271 }
272
273 String String::lower() const {
274 if (!m_impl)
275 return String();
276 return m_impl->lower();
277 }
278
279 String String::upper() const {
280 if (!m_impl)
281 return String();
282 return m_impl->upper();
283 }
284
285 String String::lower(const AtomicString& localeIdentifier) const {
286 if (!m_impl)
287 return String();
288 return m_impl->lower(localeIdentifier);
289 }
290
291 String String::upper(const AtomicString& localeIdentifier) const {
292 if (!m_impl)
293 return String();
294 return m_impl->upper(localeIdentifier);
295 }
296
297 String String::upperASCII() const {
298 if (!m_impl)
299 return String();
300 return m_impl->upperASCII();
301 }
302
303 String String::stripWhiteSpace() const {
304 if (!m_impl)
305 return String();
306 return m_impl->stripWhiteSpace();
307 }
308
309 String String::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace) const {
310 if (!m_impl)
311 return String();
312 return m_impl->stripWhiteSpace(isWhiteSpace);
313 }
314
315 String String::simplifyWhiteSpace(StripBehavior stripBehavior) const {
316 if (!m_impl)
317 return String();
318 return m_impl->simplifyWhiteSpace(stripBehavior);
319 }
320
321 String String::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace,
322 StripBehavior stripBehavior) const {
323 if (!m_impl)
324 return String();
325 return m_impl->simplifyWhiteSpace(isWhiteSpace, stripBehavior);
326 }
327
328 String String::removeCharacters(CharacterMatchFunctionPtr findMatch) const {
329 if (!m_impl)
330 return String();
331 return m_impl->removeCharacters(findMatch);
332 }
333
334 String String::foldCase() const {
335 if (!m_impl)
336 return String();
337 return m_impl->foldCase();
338 }
339
340 String String::format(const char* format, ...) {
341 va_list args;
342
343 // TODO(esprehn): base uses 1024, maybe we should use a bigger size too.
344 static const unsigned kDefaultSize = 256;
345 Vector<char, kDefaultSize> buffer(kDefaultSize);
346
347 va_start(args, format);
348 int length = base::vsnprintf(buffer.data(), buffer.size(), format, args);
349 va_end(args);
350
351 // TODO(esprehn): This can only happen if there's an encoding error, what's
352 // the locale set to inside blink? Can this happen? We should probably CHECK
353 // instead.
354 if (length < 0)
355 return String();
356
357 if (static_cast<unsigned>(length) >= buffer.size()) {
358 // vsnprintf doesn't include the NUL terminator in the length so we need to
359 // add space for it when growing.
360 buffer.grow(length + 1);
361
362 // We need to call va_end() and then va_start() each time we use args, as
363 // the contents of args is undefined after the call to vsnprintf according
364 // to http://man.cx/snprintf(3)
365 //
366 // Not calling va_end/va_start here happens to work on lots of systems, but
367 // fails e.g. on 64bit Linux.
368 va_start(args, format);
369 length = base::vsnprintf(buffer.data(), buffer.size(), format, args);
370 va_end(args);
371 }
372
373 CHECK_LT(static_cast<unsigned>(length), buffer.size());
374 return String(reinterpret_cast<const LChar*>(buffer.data()), length);
375 }
376
377 template <typename IntegerType>
378 static String integerToString(IntegerType input) {
379 IntegerToStringConverter<IntegerType> converter(input);
380 return StringImpl::create(converter.characters8(), converter.length());
381 }
382
383 String String::number(int number) {
384 return integerToString(number);
385 }
386
387 String String::number(unsigned number) {
388 return integerToString(number);
389 }
390
391 String String::number(long number) {
392 return integerToString(number);
393 }
394
395 String String::number(unsigned long number) {
396 return integerToString(number);
397 }
398
399 String String::number(long long number) {
400 return integerToString(number);
401 }
402
403 String String::number(unsigned long long number) {
404 return integerToString(number);
405 }
406
407 String String::number(double number, unsigned precision) {
408 NumberToStringBuffer buffer;
409 return String(numberToFixedPrecisionString(number, precision, buffer));
410 }
411
412 String String::numberToStringECMAScript(double number) {
413 NumberToStringBuffer buffer;
414 return String(numberToString(number, buffer));
415 }
416
417 String String::numberToStringFixedWidth(double number, unsigned decimalPlaces) {
418 NumberToStringBuffer buffer;
419 return String(numberToFixedWidthString(number, decimalPlaces, buffer));
420 }
421
422 int String::toIntStrict(bool* ok, int base) const {
423 if (!m_impl) {
424 if (ok)
425 *ok = false;
426 return 0;
427 }
428 return m_impl->toIntStrict(ok, base);
429 }
430
431 unsigned String::toUIntStrict(bool* ok, int base) const {
432 if (!m_impl) {
433 if (ok)
434 *ok = false;
435 return 0;
436 }
437 return m_impl->toUIntStrict(ok, base);
438 }
439
440 int64_t String::toInt64Strict(bool* ok, int base) const {
441 if (!m_impl) {
442 if (ok)
443 *ok = false;
444 return 0;
445 }
446 return m_impl->toInt64Strict(ok, base);
447 }
448
449 uint64_t String::toUInt64Strict(bool* ok, int base) const {
450 if (!m_impl) {
451 if (ok)
452 *ok = false;
453 return 0;
454 }
455 return m_impl->toUInt64Strict(ok, base);
456 }
457
458 int String::toInt(bool* ok) const {
459 if (!m_impl) {
460 if (ok)
461 *ok = false;
462 return 0;
463 }
464 return m_impl->toInt(ok);
465 }
466
467 unsigned String::toUInt(bool* ok) const {
468 if (!m_impl) {
469 if (ok)
470 *ok = false;
471 return 0;
472 }
473 return m_impl->toUInt(ok);
474 }
475
476 int64_t String::toInt64(bool* ok) const {
477 if (!m_impl) {
478 if (ok)
479 *ok = false;
480 return 0;
481 }
482 return m_impl->toInt64(ok);
483 }
484
485 uint64_t String::toUInt64(bool* ok) const {
486 if (!m_impl) {
487 if (ok)
488 *ok = false;
489 return 0;
490 }
491 return m_impl->toUInt64(ok);
492 }
493
494 double String::toDouble(bool* ok) const {
495 if (!m_impl) {
496 if (ok)
497 *ok = false;
498 return 0.0;
499 }
500 return m_impl->toDouble(ok);
501 }
502
503 float String::toFloat(bool* ok) const {
504 if (!m_impl) {
505 if (ok)
506 *ok = false;
507 return 0.0f;
508 }
509 return m_impl->toFloat(ok);
510 }
511
512 String String::isolatedCopy() const {
513 if (!m_impl)
514 return String();
515 return m_impl->isolatedCopy();
516 }
517
518 bool String::isSafeToSendToAnotherThread() const {
519 return !m_impl || m_impl->isSafeToSendToAnotherThread();
520 }
521
522 void String::split(const StringView& separator,
523 bool allowEmptyEntries,
524 Vector<String>& result) const {
525 result.clear();
526
527 unsigned startPos = 0;
528 size_t endPos;
529 while ((endPos = find(separator, startPos)) != kNotFound) {
530 if (allowEmptyEntries || startPos != endPos)
531 result.push_back(substring(startPos, endPos - startPos));
532 startPos = endPos + separator.length();
533 }
534 if (allowEmptyEntries || startPos != length())
535 result.push_back(substring(startPos));
536 }
537
538 void String::split(UChar separator,
539 bool allowEmptyEntries,
540 Vector<String>& result) const {
541 result.clear();
542
543 unsigned startPos = 0;
544 size_t endPos;
545 while ((endPos = find(separator, startPos)) != kNotFound) {
546 if (allowEmptyEntries || startPos != endPos)
547 result.push_back(substring(startPos, endPos - startPos));
548 startPos = endPos + 1;
549 }
550 if (allowEmptyEntries || startPos != length())
551 result.push_back(substring(startPos));
552 }
553
554 CString String::ascii() const {
555 // Printable ASCII characters 32..127 and the null character are
556 // preserved, characters outside of this range are converted to '?'.
557
558 unsigned length = this->length();
559 if (!length) {
560 char* characterBuffer;
561 return CString::createUninitialized(length, characterBuffer);
562 }
563
564 if (this->is8Bit()) {
565 const LChar* characters = this->characters8();
566
567 char* characterBuffer;
568 CString result = CString::createUninitialized(length, characterBuffer);
569
570 for (unsigned i = 0; i < length; ++i) {
571 LChar ch = characters[i];
572 characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch;
573 }
574
575 return result;
576 }
577
578 const UChar* characters = this->characters16();
579
580 char* characterBuffer;
581 CString result = CString::createUninitialized(length, characterBuffer);
582
583 for (unsigned i = 0; i < length; ++i) {
584 UChar ch = characters[i];
585 characterBuffer[i] =
586 ch && (ch < 0x20 || ch > 0x7f) ? '?' : static_cast<char>(ch);
587 }
588
589 return result;
590 }
591
592 CString String::latin1() const {
593 // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
594 // preserved, characters outside of this range are converted to '?'.
595
596 unsigned length = this->length();
597
598 if (!length)
599 return CString("", 0);
600
601 if (is8Bit())
602 return CString(reinterpret_cast<const char*>(this->characters8()), length);
603
604 const UChar* characters = this->characters16();
605
606 char* characterBuffer;
607 CString result = CString::createUninitialized(length, characterBuffer);
608
609 for (unsigned i = 0; i < length; ++i) {
610 UChar ch = characters[i];
611 characterBuffer[i] = ch > 0xff ? '?' : static_cast<char>(ch);
612 }
613
614 return result;
615 }
616
617 // Helper to write a three-byte UTF-8 code point to the buffer, caller must
618 // check room is available.
619 static inline void putUTF8Triple(char*& buffer, UChar ch) {
620 DCHECK_GE(ch, 0x0800);
621 *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0);
622 *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80);
623 *buffer++ = static_cast<char>((ch & 0x3F) | 0x80);
624 }
625
626 CString String::utf8(UTF8ConversionMode mode) const {
627 unsigned length = this->length();
628
629 if (!length)
630 return CString("", 0);
631
632 // Allocate a buffer big enough to hold all the characters
633 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
634 // Optimization ideas, if we find this function is hot:
635 // * We could speculatively create a CStringImpl to contain 'length'
636 // characters, and resize if necessary (i.e. if the buffer contains
637 // non-ascii characters). (Alternatively, scan the buffer first for
638 // ascii characters, so we know this will be sufficient).
639 // * We could allocate a CStringImpl with an appropriate size to
640 // have a good chance of being able to write the string into the
641 // buffer without reallocing (say, 1.5 x length).
642 if (length > std::numeric_limits<unsigned>::max() / 3)
643 return CString();
644 Vector<char, 1024> bufferVector(length * 3);
645
646 char* buffer = bufferVector.data();
647
648 if (is8Bit()) {
649 const LChar* characters = this->characters8();
650
651 ConversionResult result =
652 convertLatin1ToUTF8(&characters, characters + length, &buffer,
653 buffer + bufferVector.size());
654 // (length * 3) should be sufficient for any conversion
655 DCHECK_NE(result, targetExhausted);
656 } else {
657 const UChar* characters = this->characters16();
658
659 if (mode == StrictUTF8ConversionReplacingUnpairedSurrogatesWithFFFD) {
660 const UChar* charactersEnd = characters + length;
661 char* bufferEnd = buffer + bufferVector.size();
662 while (characters < charactersEnd) {
663 // Use strict conversion to detect unpaired surrogates.
664 ConversionResult result = convertUTF16ToUTF8(&characters, charactersEnd,
665 &buffer, bufferEnd, true);
666 DCHECK_NE(result, targetExhausted);
667 // Conversion fails when there is an unpaired surrogate. Put
668 // replacement character (U+FFFD) instead of the unpaired
669 // surrogate.
670 if (result != conversionOK) {
671 DCHECK_LE(0xD800, *characters);
672 DCHECK_LE(*characters, 0xDFFF);
673 // There should be room left, since one UChar hasn't been
674 // converted.
675 DCHECK_LE(buffer + 3, bufferEnd);
676 putUTF8Triple(buffer, replacementCharacter);
677 ++characters;
678 }
679 }
680 } else {
681 bool strict = mode == StrictUTF8Conversion;
682 ConversionResult result =
683 convertUTF16ToUTF8(&characters, characters + length, &buffer,
684 buffer + bufferVector.size(), strict);
685 // (length * 3) should be sufficient for any conversion
686 DCHECK_NE(result, targetExhausted);
687
688 // Only produced from strict conversion.
689 if (result == sourceIllegal) {
690 DCHECK(strict);
691 return CString();
692 }
693
694 // Check for an unconverted high surrogate.
695 if (result == sourceExhausted) {
696 if (strict)
697 return CString();
698 // This should be one unpaired high surrogate. Treat it the same
699 // was as an unpaired high surrogate would have been handled in
700 // the middle of a string with non-strict conversion - which is
701 // to say, simply encode it to UTF-8.
702 DCHECK_EQ(characters + 1, this->characters16() + length);
703 DCHECK_GE(*characters, 0xD800);
704 DCHECK_LE(*characters, 0xDBFF);
705 // There should be room left, since one UChar hasn't been
706 // converted.
707 DCHECK_LE(buffer + 3, buffer + bufferVector.size());
708 putUTF8Triple(buffer, *characters);
709 }
710 }
711 }
712
713 return CString(bufferVector.data(), buffer - bufferVector.data());
714 }
715
716 String String::make8BitFrom16BitSource(const UChar* source, size_t length) {
717 if (!length)
718 return emptyString;
719
720 LChar* destination;
721 String result = String::createUninitialized(length, destination);
722
723 copyLCharsFromUCharSource(destination, source, length);
724
725 return result;
726 }
727
728 String String::make16BitFrom8BitSource(const LChar* source, size_t length) {
729 if (!length)
730 return emptyString16Bit;
731
732 UChar* destination;
733 String result = String::createUninitialized(length, destination);
734
735 StringImpl::copyChars(destination, source, length);
736
737 return result;
738 }
739
740 String String::fromUTF8(const LChar* stringStart, size_t length) {
741 RELEASE_ASSERT(length <= std::numeric_limits<unsigned>::max());
742
743 if (!stringStart)
744 return String();
745
746 if (!length)
747 return emptyString;
748
749 if (charactersAreAllASCII(stringStart, length))
750 return StringImpl::create(stringStart, length);
751
752 Vector<UChar, 1024> buffer(length);
753 UChar* bufferStart = buffer.data();
754
755 UChar* bufferCurrent = bufferStart;
756 const char* stringCurrent = reinterpret_cast<const char*>(stringStart);
757 if (convertUTF8ToUTF16(
758 &stringCurrent, reinterpret_cast<const char*>(stringStart + length),
759 &bufferCurrent, bufferCurrent + buffer.size()) != conversionOK)
760 return String();
761
762 unsigned utf16Length = bufferCurrent - bufferStart;
763 DCHECK_LT(utf16Length, length);
764 return StringImpl::create(bufferStart, utf16Length);
765 }
766
767 String String::fromUTF8(const LChar* string) {
768 if (!string)
769 return String();
770 return fromUTF8(string, strlen(reinterpret_cast<const char*>(string)));
771 }
772
773 String String::fromUTF8(const CString& s) {
774 return fromUTF8(s.data());
775 }
776
777 String String::fromUTF8WithLatin1Fallback(const LChar* string, size_t size) {
778 String utf8 = fromUTF8(string, size);
779 if (!utf8)
780 return String(string, size);
781 return utf8;
782 }
783
784 std::ostream& operator<<(std::ostream& out, const String& string) {
785 if (string.isNull())
786 return out << "<null>";
787
788 out << '"';
789 for (unsigned index = 0; index < string.length(); ++index) {
790 // Print shorthands for select cases.
791 UChar character = string[index];
792 switch (character) {
793 case '\t':
794 out << "\\t";
795 break;
796 case '\n':
797 out << "\\n";
798 break;
799 case '\r':
800 out << "\\r";
801 break;
802 case '"':
803 out << "\\\"";
804 break;
805 case '\\':
806 out << "\\\\";
807 break;
808 default:
809 if (isASCIIPrintable(character)) {
810 out << static_cast<char>(character);
811 } else {
812 // Print "\uXXXX" for control or non-ASCII characters.
813 out << "\\u";
814 out.width(4);
815 out.fill('0');
816 out.setf(std::ios_base::hex, std::ios_base::basefield);
817 out.setf(std::ios::uppercase);
818 out << character;
819 }
820 break;
821 }
822 }
823 return out << '"';
824 }
825
826 #ifndef NDEBUG
827 void String::show() const {
828 dataLogF("%s\n", asciiDebug(impl()).data());
829 }
830 #endif
831
832 } // namespace WTF
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/wtf/text/WTFString.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698