Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(280)

Side by Side Diff: Source/WTF/wtf/text/StringImpl.cpp

Issue 14238015: Move Source/WTF/wtf to Source/wtf (Closed) Base URL: svn://svn.chromium.org/blink/trunk
Patch Set: Created 7 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 /*
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 * (C) 1999 Antti Koivisto (koivisto@kde.org)
4 * (C) 2001 Dirk Mueller ( mueller@kde.org )
5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All r ights reserved.
6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
17 *
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB. If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
22 *
23 */
24
25 #include "config.h"
26 #include "StringImpl.h"
27
28 #include "AtomicString.h"
29 #include "StringBuffer.h"
30 #include "StringHash.h"
31 #include <wtf/ProcessID.h>
32 #include <wtf/StdLibExtras.h>
33 #include <wtf/WTFThreadData.h>
34 #include <wtf/unicode/CharacterNames.h>
35
36 #ifdef STRING_STATS
37 #include <unistd.h>
38 #include <wtf/DataLog.h>
39 #endif
40
41 using namespace std;
42
43 namespace WTF {
44
45 using namespace Unicode;
46
47 COMPILE_ASSERT(sizeof(StringImpl) == 2 * sizeof(int) + 3 * sizeof(void*), String Impl_should_stay_small);
48
49 #ifdef STRING_STATS
50 StringStats StringImpl::m_stringStats;
51
52 unsigned StringStats::s_stringRemovesTillPrintStats = StringStats::s_printString StatsFrequency;
53
54 void StringStats::removeString(StringImpl* string)
55 {
56 unsigned length = string->length();
57 bool isSubString = string->isSubString();
58
59 --m_totalNumberStrings;
60
61 if (string->has16BitShadow()) {
62 --m_numberUpconvertedStrings;
63 if (!isSubString)
64 m_totalUpconvertedData -= length;
65 }
66
67 if (string->is8Bit()) {
68 --m_number8BitStrings;
69 if (!isSubString)
70 m_total8BitData -= length;
71 } else {
72 --m_number16BitStrings;
73 if (!isSubString)
74 m_total16BitData -= length;
75 }
76
77 if (!--s_stringRemovesTillPrintStats) {
78 s_stringRemovesTillPrintStats = s_printStringStatsFrequency;
79 printStats();
80 }
81 }
82
83 void StringStats::printStats()
84 {
85 dataLogF("String stats for process id %d:\n", getCurrentProcessID());
86
87 unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitDat a;
88 double percent8Bit = m_totalNumberStrings ? ((double)m_number8BitStrings * 1 00) / (double)m_totalNumberStrings : 0.0;
89 double average8bitLength = m_number8BitStrings ? (double)m_total8BitData / ( double)m_number8BitStrings : 0.0;
90 dataLogF("%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData, av erage8bitLength);
91
92 double percent16Bit = m_totalNumberStrings ? ((double)m_number16BitStrings * 100) / (double)m_totalNumberStrings : 0.0;
93 double average16bitLength = m_number16BitStrings ? (double)m_total16BitData / (double)m_number16BitStrings : 0.0;
94 dataLogF("%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number16BitStrings, percent16Bit, m_total16BitData, m_total16BitData * 2, average16bitLength);
95
96 double percentUpconverted = m_totalNumberStrings ? ((double)m_numberUpconver tedStrings * 100) / (double)m_number8BitStrings : 0.0;
97 double averageUpconvertedLength = m_numberUpconvertedStrings ? (double)m_tot alUpconvertedData / (double)m_numberUpconvertedStrings : 0.0;
98 dataLogF("%8u (%5.2f%%) upconverted %12llu chars %12llu bytes avg length %6.1f\n", m_numberUpconvertedStrings, percentUpconverted, m_totalUpconvertedData , m_totalUpconvertedData * 2, averageUpconvertedLength);
99
100 double averageLength = m_totalNumberStrings ? (double)totalNumberCharacters / (double)m_totalNumberStrings : 0.0;
101 unsigned long long totalDataBytes = m_total8BitData + (m_total16BitData + m_ totalUpconvertedData) * 2;
102 dataLogF("%8u Total %12llu chars %12llu bytes avg length % 6.1f\n", m_totalNumberStrings, totalNumberCharacters, totalDataBytes, averageLen gth);
103 unsigned long long totalSavedBytes = m_total8BitData - m_totalUpconvertedDat a;
104 double percentSavings = totalSavedBytes ? ((double)totalSavedBytes * 100) / (double)(totalDataBytes + totalSavedBytes) : 0.0;
105 dataLogF(" Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes, percentSavings);
106 }
107 #endif
108
109
110 StringImpl::~StringImpl()
111 {
112 ASSERT(!isStatic());
113
114 STRING_STATS_REMOVE_STRING(this);
115
116 if (isAtomic())
117 AtomicString::remove(this);
118
119 BufferOwnership ownership = bufferOwnership();
120
121 if (has16BitShadow()) {
122 ASSERT(m_copyData16);
123 fastFree(m_copyData16);
124 }
125
126 if (ownership == BufferInternal)
127 return;
128 if (ownership == BufferOwned) {
129 // We use m_data8, but since it is a union with m_data16 this works eith er way.
130 ASSERT(m_data8);
131 fastFree(const_cast<LChar*>(m_data8));
132 return;
133 }
134 ASSERT(ownership == BufferSubstring);
135 ASSERT(m_substringBuffer);
136 m_substringBuffer->deref();
137 }
138
139 PassRefPtr<StringImpl> StringImpl::createFromLiteral(const char* characters, uns igned length)
140 {
141 ASSERT_WITH_MESSAGE(length, "Use StringImpl::empty() to create an empty stri ng");
142 ASSERT(charactersAreAllASCII<LChar>(reinterpret_cast<const LChar*>(character s), length));
143 return adoptRef(new StringImpl(characters, length, ConstructFromLiteral));
144 }
145
146 PassRefPtr<StringImpl> StringImpl::createFromLiteral(const char* characters)
147 {
148 size_t length = strlen(characters);
149 ASSERT_WITH_MESSAGE(length, "Use StringImpl::empty() to create an empty stri ng");
150 ASSERT(charactersAreAllASCII<LChar>(reinterpret_cast<const LChar*>(character s), length));
151 return adoptRef(new StringImpl(characters, length, ConstructFromLiteral));
152 }
153
154 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, LChar*& data)
155 {
156 if (!length) {
157 data = 0;
158 return empty();
159 }
160
161 // Allocate a single buffer large enough to contain the StringImpl
162 // struct as well as the data which it contains. This removes one
163 // heap allocation from this call.
164 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(Str ingImpl)) / sizeof(LChar)));
165 size_t size = sizeof(StringImpl) + length * sizeof(LChar);
166 StringImpl* string = static_cast<StringImpl*>(fastMalloc(size));
167
168 data = reinterpret_cast<LChar*>(string + 1);
169 return adoptRef(new (NotNull, string) StringImpl(length, Force8BitConstructo r));
170 }
171
172 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data)
173 {
174 if (!length) {
175 data = 0;
176 return empty();
177 }
178
179 // Allocate a single buffer large enough to contain the StringImpl
180 // struct as well as the data which it contains. This removes one
181 // heap allocation from this call.
182 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(Str ingImpl)) / sizeof(UChar)));
183 size_t size = sizeof(StringImpl) + length * sizeof(UChar);
184 StringImpl* string = static_cast<StringImpl*>(fastMalloc(size));
185
186 data = reinterpret_cast<UChar*>(string + 1);
187 return adoptRef(new (NotNull, string) StringImpl(length));
188 }
189
190 PassRefPtr<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalStr ing, unsigned length, LChar*& data)
191 {
192 ASSERT(originalString->is8Bit());
193 ASSERT(originalString->hasOneRef());
194 ASSERT(originalString->bufferOwnership() == BufferInternal);
195
196 if (!length) {
197 data = 0;
198 return empty();
199 }
200
201 // Same as createUninitialized() except here we use fastRealloc.
202 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(Str ingImpl)) / sizeof(LChar)));
203 size_t size = sizeof(StringImpl) + length * sizeof(LChar);
204 originalString->~StringImpl();
205 StringImpl* string = static_cast<StringImpl*>(fastRealloc(originalString.lea kRef(), size));
206
207 data = reinterpret_cast<LChar*>(string + 1);
208 return adoptRef(new (NotNull, string) StringImpl(length, Force8BitConstructo r));
209 }
210
211 PassRefPtr<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalStr ing, unsigned length, UChar*& data)
212 {
213 ASSERT(!originalString->is8Bit());
214 ASSERT(originalString->hasOneRef());
215 ASSERT(originalString->bufferOwnership() == BufferInternal);
216
217 if (!length) {
218 data = 0;
219 return empty();
220 }
221
222 // Same as createUninitialized() except here we use fastRealloc.
223 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(Str ingImpl)) / sizeof(UChar)));
224 size_t size = sizeof(StringImpl) + length * sizeof(UChar);
225 originalString->~StringImpl();
226 StringImpl* string = static_cast<StringImpl*>(fastRealloc(originalString.lea kRef(), size));
227
228 data = reinterpret_cast<UChar*>(string + 1);
229 return adoptRef(new (NotNull, string) StringImpl(length));
230 }
231
232 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned leng th)
233 {
234 if (!characters || !length)
235 return empty();
236
237 UChar* data;
238 RefPtr<StringImpl> string = createUninitialized(length, data);
239 memcpy(data, characters, length * sizeof(UChar));
240 return string.release();
241 }
242
243 PassRefPtr<StringImpl> StringImpl::create(const LChar* characters, unsigned leng th)
244 {
245 if (!characters || !length)
246 return empty();
247
248 LChar* data;
249 RefPtr<StringImpl> string = createUninitialized(length, data);
250 memcpy(data, characters, length * sizeof(LChar));
251 return string.release();
252 }
253
254 PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters, unsigned length)
255 {
256 if (!characters || !length)
257 return empty();
258
259 LChar* data;
260 RefPtr<StringImpl> string = createUninitialized(length, data);
261
262 for (size_t i = 0; i < length; ++i) {
263 if (characters[i] & 0xff00)
264 return create(characters, length);
265 data[i] = static_cast<LChar>(characters[i]);
266 }
267
268 return string.release();
269 }
270
271 PassRefPtr<StringImpl> StringImpl::create(const LChar* string)
272 {
273 if (!string)
274 return empty();
275 size_t length = strlen(reinterpret_cast<const char*>(string));
276 RELEASE_ASSERT(length <= numeric_limits<unsigned>::max());
277 return create(string, length);
278 }
279
280 const UChar* StringImpl::getData16SlowCase() const
281 {
282 if (has16BitShadow())
283 return m_copyData16;
284
285 if (bufferOwnership() == BufferSubstring) {
286 // If this is a substring, return a pointer into the parent string.
287 // TODO: Consider severing this string from the parent string
288 unsigned offset = m_data8 - m_substringBuffer->characters8();
289 return m_substringBuffer->characters() + offset;
290 }
291
292 STRING_STATS_ADD_UPCONVERTED_STRING(m_length);
293
294 unsigned len = length();
295 if (hasTerminatingNullCharacter())
296 ++len;
297
298 m_copyData16 = static_cast<UChar*>(fastMalloc(len * sizeof(UChar)));
299
300 m_hashAndFlags |= s_hashFlagHas16BitShadow;
301
302 upconvertCharacters(0, len);
303
304 return m_copyData16;
305 }
306
307 void StringImpl::upconvertCharacters(unsigned start, unsigned end) const
308 {
309 ASSERT(is8Bit());
310 ASSERT(has16BitShadow());
311
312 for (size_t i = start; i < end; ++i)
313 m_copyData16[i] = m_data8[i];
314 }
315
316
317 bool StringImpl::containsOnlyWhitespace()
318 {
319 // FIXME: The definition of whitespace here includes a number of characters
320 // that are not whitespace from the point of view of RenderText; I wonder if
321 // that's a problem in practice.
322 if (is8Bit()) {
323 for (unsigned i = 0; i < m_length; ++i) {
324 UChar c = m_data8[i];
325 if (!isASCIISpace(c))
326 return false;
327 }
328
329 return true;
330 }
331
332 for (unsigned i = 0; i < m_length; ++i) {
333 UChar c = m_data16[i];
334 if (!isASCIISpace(c))
335 return false;
336 }
337 return true;
338 }
339
340 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length)
341 {
342 if (start >= m_length)
343 return empty();
344 unsigned maxLength = m_length - start;
345 if (length >= maxLength) {
346 if (!start)
347 return this;
348 length = maxLength;
349 }
350 if (is8Bit())
351 return create(m_data8 + start, length);
352
353 return create(m_data16 + start, length);
354 }
355
356 UChar32 StringImpl::characterStartingAt(unsigned i)
357 {
358 if (is8Bit())
359 return m_data8[i];
360 if (U16_IS_SINGLE(m_data16[i]))
361 return m_data16[i];
362 if (i + 1 < m_length && U16_IS_LEAD(m_data16[i]) && U16_IS_TRAIL(m_data16[i + 1]))
363 return U16_GET_SUPPLEMENTARY(m_data16[i], m_data16[i + 1]);
364 return 0;
365 }
366
367 PassRefPtr<StringImpl> StringImpl::lower()
368 {
369 // Note: This is a hot function in the Dromaeo benchmark, specifically the
370 // no-op code path up through the first 'return' statement.
371
372 // First scan the string for uppercase and non-ASCII characters:
373 bool noUpper = true;
374 UChar ored = 0;
375 if (is8Bit()) {
376 const LChar* end = m_data8 + m_length;
377 for (const LChar* chp = m_data8; chp != end; ++chp) {
378 if (UNLIKELY(isASCIIUpper(*chp)))
379 noUpper = false;
380 ored |= *chp;
381 }
382 // Nothing to do if the string is all ASCII with no uppercase.
383 if (noUpper && !(ored & ~0x7F))
384 return this;
385
386 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t> ::max()));
387 int32_t length = m_length;
388
389 LChar* data8;
390 RefPtr<StringImpl> newImpl = createUninitialized(length, data8);
391
392 if (!(ored & ~0x7F)) {
393 for (int32_t i = 0; i < length; ++i)
394 data8[i] = toASCIILower(m_data8[i]);
395
396 return newImpl.release();
397 }
398
399 // Do a slower implementation for cases that include non-ASCII Latin-1 c haracters.
400 for (int32_t i = 0; i < length; ++i)
401 data8[i] = static_cast<LChar>(Unicode::toLower(m_data8[i]));
402
403 return newImpl.release();
404 }
405
406 const UChar *end = m_data16 + m_length;
407 for (const UChar* chp = m_data16; chp != end; ++chp) {
408 if (UNLIKELY(isASCIIUpper(*chp)))
409 noUpper = false;
410 ored |= *chp;
411 }
412 // Nothing to do if the string is all ASCII with no uppercase.
413 if (noUpper && !(ored & ~0x7F))
414 return this;
415
416 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::ma x()));
417 int32_t length = m_length;
418
419 if (!(ored & ~0x7F)) {
420 UChar* data16;
421 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
422
423 for (int32_t i = 0; i < length; ++i) {
424 UChar c = m_data16[i];
425 data16[i] = toASCIILower(c);
426 }
427 return newImpl.release();
428 }
429
430 // Do a slower implementation for cases that include non-ASCII characters.
431 UChar* data16;
432 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
433
434 bool error;
435 int32_t realLength = Unicode::toLower(data16, length, m_data16, m_length, &e rror);
436 if (!error && realLength == length)
437 return newImpl.release();
438
439 newImpl = createUninitialized(realLength, data16);
440 Unicode::toLower(data16, realLength, m_data16, m_length, &error);
441 if (error)
442 return this;
443 return newImpl.release();
444 }
445
446 PassRefPtr<StringImpl> StringImpl::upper()
447 {
448 // This function could be optimized for no-op cases the way lower() is,
449 // but in empirical testing, few actual calls to upper() are no-ops, so
450 // it wouldn't be worth the extra time for pre-scanning.
451
452 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::ma x()));
453 int32_t length = m_length;
454
455 if (is8Bit()) {
456 LChar* data8;
457 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);
458
459 // Do a faster loop for the case where all the characters are ASCII.
460 LChar ored = 0;
461 for (int i = 0; i < length; ++i) {
462 LChar c = m_data8[i];
463 ored |= c;
464 data8[i] = toASCIIUpper(c);
465 }
466 if (!(ored & ~0x7F))
467 return newImpl.release();
468
469 // Do a slower implementation for cases that include non-ASCII Latin-1 c haracters.
470 int numberSharpSCharacters = 0;
471
472 // There are two special cases.
473 // 1. latin-1 characters when converted to upper case are 16 bit charac ters.
474 // 2. Lower case sharp-S converts to "SS" (two characters)
475 for (int32_t i = 0; i < length; ++i) {
476 LChar c = m_data8[i];
477 if (UNLIKELY(c == smallLetterSharpS))
478 ++numberSharpSCharacters;
479 UChar upper = Unicode::toUpper(c);
480 if (UNLIKELY(upper > 0xff)) {
481 // Since this upper-cased character does not fit in an 8-bit str ing, we need to take the 16-bit path.
482 goto upconvert;
483 }
484 data8[i] = static_cast<LChar>(upper);
485 }
486
487 if (!numberSharpSCharacters)
488 return newImpl.release();
489
490 // We have numberSSCharacters sharp-s characters, but none of the other special characters.
491 newImpl = createUninitialized(m_length + numberSharpSCharacters, data8);
492
493 LChar* dest = data8;
494
495 for (int32_t i = 0; i < length; ++i) {
496 LChar c = m_data8[i];
497 if (c == smallLetterSharpS) {
498 *dest++ = 'S';
499 *dest++ = 'S';
500 } else
501 *dest++ = static_cast<LChar>(Unicode::toUpper(c));
502 }
503
504 return newImpl.release();
505 }
506
507 upconvert:
508 const UChar* source16 = characters();
509
510 UChar* data16;
511 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
512
513 // Do a faster loop for the case where all the characters are ASCII.
514 UChar ored = 0;
515 for (int i = 0; i < length; ++i) {
516 UChar c = source16[i];
517 ored |= c;
518 data16[i] = toASCIIUpper(c);
519 }
520 if (!(ored & ~0x7F))
521 return newImpl.release();
522
523 // Do a slower implementation for cases that include non-ASCII characters.
524 bool error;
525 newImpl = createUninitialized(m_length, data16);
526 int32_t realLength = Unicode::toUpper(data16, length, source16, m_length, &e rror);
527 if (!error && realLength == length)
528 return newImpl;
529 newImpl = createUninitialized(realLength, data16);
530 Unicode::toUpper(data16, realLength, source16, m_length, &error);
531 if (error)
532 return this;
533 return newImpl.release();
534 }
535
536 PassRefPtr<StringImpl> StringImpl::fill(UChar character)
537 {
538 if (!m_length)
539 return this;
540
541 if (!(character & ~0x7F)) {
542 LChar* data;
543 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
544 for (unsigned i = 0; i < m_length; ++i)
545 data[i] = character;
546 return newImpl.release();
547 }
548 UChar* data;
549 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
550 for (unsigned i = 0; i < m_length; ++i)
551 data[i] = character;
552 return newImpl.release();
553 }
554
555 PassRefPtr<StringImpl> StringImpl::foldCase()
556 {
557 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::ma x()));
558 int32_t length = m_length;
559
560 if (is8Bit()) {
561 // Do a faster loop for the case where all the characters are ASCII.
562 LChar* data;
563 RefPtr <StringImpl>newImpl = createUninitialized(m_length, data);
564 LChar ored = 0;
565
566 for (int32_t i = 0; i < length; ++i) {
567 LChar c = m_data8[i];
568 data[i] = toASCIILower(c);
569 ored |= c;
570 }
571
572 if (!(ored & ~0x7F))
573 return newImpl.release();
574
575 // Do a slower implementation for cases that include non-ASCII Latin-1 c haracters.
576 for (int32_t i = 0; i < length; ++i)
577 data[i] = static_cast<LChar>(Unicode::toLower(m_data8[i]));
578
579 return newImpl.release();
580 }
581
582 // Do a faster loop for the case where all the characters are ASCII.
583 UChar* data;
584 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
585 UChar ored = 0;
586 for (int32_t i = 0; i < length; ++i) {
587 UChar c = m_data16[i];
588 ored |= c;
589 data[i] = toASCIILower(c);
590 }
591 if (!(ored & ~0x7F))
592 return newImpl.release();
593
594 // Do a slower implementation for cases that include non-ASCII characters.
595 bool error;
596 int32_t realLength = Unicode::foldCase(data, length, m_data16, m_length, &er ror);
597 if (!error && realLength == length)
598 return newImpl.release();
599 newImpl = createUninitialized(realLength, data);
600 Unicode::foldCase(data, realLength, m_data16, m_length, &error);
601 if (error)
602 return this;
603 return newImpl.release();
604 }
605
606 template <class UCharPredicate>
607 inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters(UCharPredicate predicate)
608 {
609 if (!m_length)
610 return empty();
611
612 unsigned start = 0;
613 unsigned end = m_length - 1;
614
615 // skip white space from start
616 while (start <= end && predicate(is8Bit() ? m_data8[start] : m_data16[start] ))
617 ++start;
618
619 // only white space
620 if (start > end)
621 return empty();
622
623 // skip white space from end
624 while (end && predicate(is8Bit() ? m_data8[end] : m_data16[end]))
625 --end;
626
627 if (!start && end == m_length - 1)
628 return this;
629 if (is8Bit())
630 return create(m_data8 + start, end + 1 - start);
631 return create(m_data16 + start, end + 1 - start);
632 }
633
634 class UCharPredicate {
635 public:
636 inline UCharPredicate(CharacterMatchFunctionPtr function): m_function(functi on) { }
637
638 inline bool operator()(UChar ch) const
639 {
640 return m_function(ch);
641 }
642
643 private:
644 const CharacterMatchFunctionPtr m_function;
645 };
646
647 class SpaceOrNewlinePredicate {
648 public:
649 inline bool operator()(UChar ch) const
650 {
651 return isSpaceOrNewline(ch);
652 }
653 };
654
655 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace()
656 {
657 return stripMatchedCharacters(SpaceOrNewlinePredicate());
658 }
659
660 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhi teSpace)
661 {
662 return stripMatchedCharacters(UCharPredicate(isWhiteSpace));
663 }
664
665 template <typename CharType>
666 ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters(const CharType * characters, CharacterMatchFunctionPtr findMatch)
667 {
668 const CharType* from = characters;
669 const CharType* fromend = from + m_length;
670
671 // Assume the common case will not remove any characters
672 while (from != fromend && !findMatch(*from))
673 ++from;
674 if (from == fromend)
675 return this;
676
677 StringBuffer<CharType> data(m_length);
678 CharType* to = data.characters();
679 unsigned outc = from - characters;
680
681 if (outc)
682 memcpy(to, characters, outc * sizeof(CharType));
683
684 while (true) {
685 while (from != fromend && findMatch(*from))
686 ++from;
687 while (from != fromend && !findMatch(*from))
688 to[outc++] = *from++;
689 if (from == fromend)
690 break;
691 }
692
693 data.shrink(outc);
694
695 return adopt(data);
696 }
697
698 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr fi ndMatch)
699 {
700 if (is8Bit())
701 return removeCharacters(characters8(), findMatch);
702 return removeCharacters(characters16(), findMatch);
703 }
704
705 template <typename CharType, class UCharPredicate>
706 inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UChar Predicate predicate)
707 {
708 StringBuffer<CharType> data(m_length);
709
710 const CharType* from = getCharacters<CharType>();
711 const CharType* fromend = from + m_length;
712 int outc = 0;
713 bool changedToSpace = false;
714
715 CharType* to = data.characters();
716
717 while (true) {
718 while (from != fromend && predicate(*from)) {
719 if (*from != ' ')
720 changedToSpace = true;
721 ++from;
722 }
723 while (from != fromend && !predicate(*from))
724 to[outc++] = *from++;
725 if (from != fromend)
726 to[outc++] = ' ';
727 else
728 break;
729 }
730
731 if (outc > 0 && to[outc - 1] == ' ')
732 --outc;
733
734 if (static_cast<unsigned>(outc) == m_length && !changedToSpace)
735 return this;
736
737 data.shrink(outc);
738
739 return adopt(data);
740 }
741
742 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace()
743 {
744 if (is8Bit())
745 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(SpaceOrNewlin ePredicate());
746 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(SpaceOrNewlinePre dicate());
747 }
748
749 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr is WhiteSpace)
750 {
751 if (is8Bit())
752 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(UCharPredicat e(isWhiteSpace));
753 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(UCharPredicate(is WhiteSpace));
754 }
755
756 int StringImpl::toIntStrict(bool* ok, int base)
757 {
758 if (is8Bit())
759 return charactersToIntStrict(characters8(), m_length, ok, base);
760 return charactersToIntStrict(characters16(), m_length, ok, base);
761 }
762
763 unsigned StringImpl::toUIntStrict(bool* ok, int base)
764 {
765 if (is8Bit())
766 return charactersToUIntStrict(characters8(), m_length, ok, base);
767 return charactersToUIntStrict(characters16(), m_length, ok, base);
768 }
769
770 int64_t StringImpl::toInt64Strict(bool* ok, int base)
771 {
772 if (is8Bit())
773 return charactersToInt64Strict(characters8(), m_length, ok, base);
774 return charactersToInt64Strict(characters16(), m_length, ok, base);
775 }
776
777 uint64_t StringImpl::toUInt64Strict(bool* ok, int base)
778 {
779 if (is8Bit())
780 return charactersToUInt64Strict(characters8(), m_length, ok, base);
781 return charactersToUInt64Strict(characters16(), m_length, ok, base);
782 }
783
784 intptr_t StringImpl::toIntPtrStrict(bool* ok, int base)
785 {
786 if (is8Bit())
787 return charactersToIntPtrStrict(characters8(), m_length, ok, base);
788 return charactersToIntPtrStrict(characters16(), m_length, ok, base);
789 }
790
791 int StringImpl::toInt(bool* ok)
792 {
793 if (is8Bit())
794 return charactersToInt(characters8(), m_length, ok);
795 return charactersToInt(characters16(), m_length, ok);
796 }
797
798 unsigned StringImpl::toUInt(bool* ok)
799 {
800 if (is8Bit())
801 return charactersToUInt(characters8(), m_length, ok);
802 return charactersToUInt(characters16(), m_length, ok);
803 }
804
805 int64_t StringImpl::toInt64(bool* ok)
806 {
807 if (is8Bit())
808 return charactersToInt64(characters8(), m_length, ok);
809 return charactersToInt64(characters16(), m_length, ok);
810 }
811
812 uint64_t StringImpl::toUInt64(bool* ok)
813 {
814 if (is8Bit())
815 return charactersToUInt64(characters8(), m_length, ok);
816 return charactersToUInt64(characters16(), m_length, ok);
817 }
818
819 intptr_t StringImpl::toIntPtr(bool* ok)
820 {
821 if (is8Bit())
822 return charactersToIntPtr(characters8(), m_length, ok);
823 return charactersToIntPtr(characters16(), m_length, ok);
824 }
825
826 double StringImpl::toDouble(bool* ok)
827 {
828 if (is8Bit())
829 return charactersToDouble(characters8(), m_length, ok);
830 return charactersToDouble(characters16(), m_length, ok);
831 }
832
833 float StringImpl::toFloat(bool* ok)
834 {
835 if (is8Bit())
836 return charactersToFloat(characters8(), m_length, ok);
837 return charactersToFloat(characters16(), m_length, ok);
838 }
839
840 bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length)
841 {
842 while (length--) {
843 LChar bc = *b++;
844 if (foldCase(*a++) != foldCase(bc))
845 return false;
846 }
847 return true;
848 }
849
850 bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length)
851 {
852 while (length--) {
853 LChar bc = *b++;
854 if (foldCase(*a++) != foldCase(bc))
855 return false;
856 }
857 return true;
858 }
859
860 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start)
861 {
862 if (is8Bit())
863 return WTF::find(characters8(), m_length, matchFunction, start);
864 return WTF::find(characters16(), m_length, matchFunction, start);
865 }
866
867 size_t StringImpl::find(const LChar* matchString, unsigned index)
868 {
869 // Check for null or empty string to match against
870 if (!matchString)
871 return notFound;
872 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString) );
873 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());
874 unsigned matchLength = matchStringLength;
875 if (!matchLength)
876 return min(index, length());
877
878 // Optimization 1: fast case for strings of length 1.
879 if (matchLength == 1)
880 return WTF::find(characters16(), length(), *matchString, index);
881
882 // Check index & matchLength are in range.
883 if (index > length())
884 return notFound;
885 unsigned searchLength = length() - index;
886 if (matchLength > searchLength)
887 return notFound;
888 // delta is the number of additional times to test; delta == 0 means test on ly once.
889 unsigned delta = searchLength - matchLength;
890
891 const UChar* searchCharacters = characters() + index;
892
893 // Optimization 2: keep a running hash of the strings,
894 // only call equal if the hashes match.
895 unsigned searchHash = 0;
896 unsigned matchHash = 0;
897 for (unsigned i = 0; i < matchLength; ++i) {
898 searchHash += searchCharacters[i];
899 matchHash += matchString[i];
900 }
901
902 unsigned i = 0;
903 // keep looping until we match
904 while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) {
905 if (i == delta)
906 return notFound;
907 searchHash += searchCharacters[i + matchLength];
908 searchHash -= searchCharacters[i];
909 ++i;
910 }
911 return index + i;
912 }
913
914 size_t StringImpl::findIgnoringCase(const LChar* matchString, unsigned index)
915 {
916 // Check for null or empty string to match against
917 if (!matchString)
918 return notFound;
919 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString) );
920 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());
921 unsigned matchLength = matchStringLength;
922 if (!matchLength)
923 return min(index, length());
924
925 // Check index & matchLength are in range.
926 if (index > length())
927 return notFound;
928 unsigned searchLength = length() - index;
929 if (matchLength > searchLength)
930 return notFound;
931 // delta is the number of additional times to test; delta == 0 means test on ly once.
932 unsigned delta = searchLength - matchLength;
933
934 const UChar* searchCharacters = characters() + index;
935
936 unsigned i = 0;
937 // keep looping until we match
938 while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) {
939 if (i == delta)
940 return notFound;
941 ++i;
942 }
943 return index + i;
944 }
945
946 template <typename SearchCharacterType, typename MatchCharacterType>
947 ALWAYS_INLINE static size_t findInner(const SearchCharacterType* searchCharacter s, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLen gth, unsigned matchLength)
948 {
949 // Optimization: keep a running hash of the strings,
950 // only call equal() if the hashes match.
951
952 // delta is the number of additional times to test; delta == 0 means test on ly once.
953 unsigned delta = searchLength - matchLength;
954
955 unsigned searchHash = 0;
956 unsigned matchHash = 0;
957
958 for (unsigned i = 0; i < matchLength; ++i) {
959 searchHash += searchCharacters[i];
960 matchHash += matchCharacters[i];
961 }
962
963 unsigned i = 0;
964 // keep looping until we match
965 while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacte rs, matchLength)) {
966 if (i == delta)
967 return notFound;
968 searchHash += searchCharacters[i + matchLength];
969 searchHash -= searchCharacters[i];
970 ++i;
971 }
972 return index + i;
973 }
974
975 size_t StringImpl::find(StringImpl* matchString)
976 {
977 // Check for null string to match against
978 if (UNLIKELY(!matchString))
979 return notFound;
980 unsigned matchLength = matchString->length();
981
982 // Optimization 1: fast case for strings of length 1.
983 if (matchLength == 1) {
984 if (is8Bit()) {
985 if (matchString->is8Bit())
986 return WTF::find(characters8(), length(), matchString->character s8()[0]);
987 return WTF::find(characters8(), length(), matchString->characters16( )[0]);
988 }
989 if (matchString->is8Bit())
990 return WTF::find(characters16(), length(), matchString->characters8( )[0]);
991 return WTF::find(characters16(), length(), matchString->characters16()[0 ]);
992 }
993
994 // Check matchLength is in range.
995 if (matchLength > length())
996 return notFound;
997
998 // Check for empty string to match against
999 if (UNLIKELY(!matchLength))
1000 return 0;
1001
1002 if (is8Bit()) {
1003 if (matchString->is8Bit())
1004 return findInner(characters8(), matchString->characters8(), 0, lengt h(), matchLength);
1005 return findInner(characters8(), matchString->characters16(), 0, length() , matchLength);
1006 }
1007
1008 if (matchString->is8Bit())
1009 return findInner(characters16(), matchString->characters8(), 0, length() , matchLength);
1010
1011 return findInner(characters16(), matchString->characters16(), 0, length(), m atchLength);
1012 }
1013
1014 size_t StringImpl::find(StringImpl* matchString, unsigned index)
1015 {
1016 // Check for null or empty string to match against
1017 if (UNLIKELY(!matchString))
1018 return notFound;
1019
1020 unsigned matchLength = matchString->length();
1021
1022 // Optimization 1: fast case for strings of length 1.
1023 if (matchLength == 1) {
1024 if (is8Bit())
1025 return WTF::find(characters8(), length(), (*matchString)[0], index);
1026 return WTF::find(characters16(), length(), (*matchString)[0], index);
1027 }
1028
1029 if (UNLIKELY(!matchLength))
1030 return min(index, length());
1031
1032 // Check index & matchLength are in range.
1033 if (index > length())
1034 return notFound;
1035 unsigned searchLength = length() - index;
1036 if (matchLength > searchLength)
1037 return notFound;
1038
1039 if (is8Bit()) {
1040 if (matchString->is8Bit())
1041 return findInner(characters8() + index, matchString->characters8(), index, searchLength, matchLength);
1042 return findInner(characters8() + index, matchString->characters16(), ind ex, searchLength, matchLength);
1043 }
1044
1045 if (matchString->is8Bit())
1046 return findInner(characters16() + index, matchString->characters8(), ind ex, searchLength, matchLength);
1047
1048 return findInner(characters16() + index, matchString->characters16(), index, searchLength, matchLength);
1049 }
1050
1051 template <typename SearchCharacterType, typename MatchCharacterType>
1052 ALWAYS_INLINE static size_t findIgnoringCaseInner(const SearchCharacterType* sea rchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsign ed searchLength, unsigned matchLength)
1053 {
1054 // delta is the number of additional times to test; delta == 0 means test on ly once.
1055 unsigned delta = searchLength - matchLength;
1056
1057 unsigned i = 0;
1058 // keep looping until we match
1059 while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength )) {
1060 if (i == delta)
1061 return notFound;
1062 ++i;
1063 }
1064 return index + i;
1065 }
1066
1067 size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index)
1068 {
1069 // Check for null or empty string to match against
1070 if (!matchString)
1071 return notFound;
1072 unsigned matchLength = matchString->length();
1073 if (!matchLength)
1074 return min(index, length());
1075
1076 // Check index & matchLength are in range.
1077 if (index > length())
1078 return notFound;
1079 unsigned searchLength = length() - index;
1080 if (matchLength > searchLength)
1081 return notFound;
1082
1083 if (is8Bit()) {
1084 if (matchString->is8Bit())
1085 return findIgnoringCaseInner(characters8() + index, matchString->cha racters8(), index, searchLength, matchLength);
1086 return findIgnoringCaseInner(characters8() + index, matchString->charact ers16(), index, searchLength, matchLength);
1087 }
1088
1089 if (matchString->is8Bit())
1090 return findIgnoringCaseInner(characters16() + index, matchString->charac ters8(), index, searchLength, matchLength);
1091
1092 return findIgnoringCaseInner(characters16() + index, matchString->characters 16(), index, searchLength, matchLength);
1093 }
1094
1095 size_t StringImpl::findNextLineStart(unsigned index)
1096 {
1097 if (is8Bit())
1098 return WTF::findNextLineStart(characters8(), m_length, index);
1099 return WTF::findNextLineStart(characters16(), m_length, index);
1100 }
1101
1102 size_t StringImpl::reverseFind(UChar c, unsigned index)
1103 {
1104 if (is8Bit())
1105 return WTF::reverseFind(characters8(), m_length, c, index);
1106 return WTF::reverseFind(characters16(), m_length, c, index);
1107 }
1108
1109 template <typename SearchCharacterType, typename MatchCharacterType>
1110 ALWAYS_INLINE static size_t reverseFindInner(const SearchCharacterType* searchCh aracters, const MatchCharacterType* matchCharacters, unsigned index, unsigned le ngth, unsigned matchLength)
1111 {
1112 // Optimization: keep a running hash of the strings,
1113 // only call equal if the hashes match.
1114
1115 // delta is the number of additional times to test; delta == 0 means test on ly once.
1116 unsigned delta = min(index, length - matchLength);
1117
1118 unsigned searchHash = 0;
1119 unsigned matchHash = 0;
1120 for (unsigned i = 0; i < matchLength; ++i) {
1121 searchHash += searchCharacters[delta + i];
1122 matchHash += matchCharacters[i];
1123 }
1124
1125 // keep looping until we match
1126 while (searchHash != matchHash || !equal(searchCharacters + delta, matchChar acters, matchLength)) {
1127 if (!delta)
1128 return notFound;
1129 --delta;
1130 searchHash -= searchCharacters[delta + matchLength];
1131 searchHash += searchCharacters[delta];
1132 }
1133 return delta;
1134 }
1135
1136 size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index)
1137 {
1138 // Check for null or empty string to match against
1139 if (!matchString)
1140 return notFound;
1141 unsigned matchLength = matchString->length();
1142 unsigned ourLength = length();
1143 if (!matchLength)
1144 return min(index, ourLength);
1145
1146 // Optimization 1: fast case for strings of length 1.
1147 if (matchLength == 1) {
1148 if (is8Bit())
1149 return WTF::reverseFind(characters8(), ourLength, (*matchString)[0], index);
1150 return WTF::reverseFind(characters16(), ourLength, (*matchString)[0], in dex);
1151 }
1152
1153 // Check index & matchLength are in range.
1154 if (matchLength > ourLength)
1155 return notFound;
1156
1157 if (is8Bit()) {
1158 if (matchString->is8Bit())
1159 return reverseFindInner(characters8(), matchString->characters8(), i ndex, ourLength, matchLength);
1160 return reverseFindInner(characters8(), matchString->characters16(), inde x, ourLength, matchLength);
1161 }
1162
1163 if (matchString->is8Bit())
1164 return reverseFindInner(characters16(), matchString->characters8(), inde x, ourLength, matchLength);
1165
1166 return reverseFindInner(characters16(), matchString->characters16(), index, ourLength, matchLength);
1167 }
1168
1169 template <typename SearchCharacterType, typename MatchCharacterType>
1170 ALWAYS_INLINE static size_t reverseFindIgnoringCaseInner(const SearchCharacterTy pe* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength)
1171 {
1172 // delta is the number of additional times to test; delta == 0 means test on ly once.
1173 unsigned delta = min(index, length - matchLength);
1174
1175 // keep looping until we match
1176 while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLe ngth)) {
1177 if (!delta)
1178 return notFound;
1179 --delta;
1180 }
1181 return delta;
1182 }
1183
1184 size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned ind ex)
1185 {
1186 // Check for null or empty string to match against
1187 if (!matchString)
1188 return notFound;
1189 unsigned matchLength = matchString->length();
1190 unsigned ourLength = length();
1191 if (!matchLength)
1192 return min(index, ourLength);
1193
1194 // Check index & matchLength are in range.
1195 if (matchLength > ourLength)
1196 return notFound;
1197
1198 if (is8Bit()) {
1199 if (matchString->is8Bit())
1200 return reverseFindIgnoringCaseInner(characters8(), matchString->char acters8(), index, ourLength, matchLength);
1201 return reverseFindIgnoringCaseInner(characters8(), matchString->characte rs16(), index, ourLength, matchLength);
1202 }
1203
1204 if (matchString->is8Bit())
1205 return reverseFindIgnoringCaseInner(characters16(), matchString->charact ers8(), index, ourLength, matchLength);
1206
1207 return reverseFindIgnoringCaseInner(characters16(), matchString->characters1 6(), index, ourLength, matchLength);
1208 }
1209
1210 ALWAYS_INLINE static bool equalInner(const StringImpl* stringImpl, unsigned star tOffset, const char* matchString, unsigned matchLength, bool caseSensitive)
1211 {
1212 ASSERT(stringImpl);
1213 ASSERT(matchLength <= stringImpl->length());
1214 ASSERT(startOffset + matchLength <= stringImpl->length());
1215
1216 if (caseSensitive) {
1217 if (stringImpl->is8Bit())
1218 return equal(stringImpl->characters8() + startOffset, reinterpret_ca st<const LChar*>(matchString), matchLength);
1219 return equal(stringImpl->characters16() + startOffset, reinterpret_cast< const LChar*>(matchString), matchLength);
1220 }
1221 if (stringImpl->is8Bit())
1222 return equalIgnoringCase(stringImpl->characters8() + startOffset, reinte rpret_cast<const LChar*>(matchString), matchLength);
1223 return equalIgnoringCase(stringImpl->characters16() + startOffset, reinterpr et_cast<const LChar*>(matchString), matchLength);
1224 }
1225
1226 bool StringImpl::startsWith(UChar character) const
1227 {
1228 return m_length && (*this)[0] == character;
1229 }
1230
1231 bool StringImpl::startsWith(const char* matchString, unsigned matchLength, bool caseSensitive) const
1232 {
1233 ASSERT(matchLength);
1234 if (matchLength > length())
1235 return false;
1236 return equalInner(this, 0, matchString, matchLength, caseSensitive);
1237 }
1238
1239 bool StringImpl::endsWith(StringImpl* matchString, bool caseSensitive)
1240 {
1241 ASSERT(matchString);
1242 if (m_length >= matchString->m_length) {
1243 unsigned start = m_length - matchString->m_length;
1244 return (caseSensitive ? find(matchString, start) : findIgnoringCase(matc hString, start)) == start;
1245 }
1246 return false;
1247 }
1248
1249 bool StringImpl::endsWith(UChar character) const
1250 {
1251 return m_length && (*this)[m_length - 1] == character;
1252 }
1253
1254 bool StringImpl::endsWith(const char* matchString, unsigned matchLength, bool ca seSensitive) const
1255 {
1256 ASSERT(matchLength);
1257 if (matchLength > length())
1258 return false;
1259 unsigned startOffset = length() - matchLength;
1260 return equalInner(this, startOffset, matchString, matchLength, caseSensitive );
1261 }
1262
1263 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC)
1264 {
1265 if (oldC == newC)
1266 return this;
1267 unsigned i;
1268 for (i = 0; i != m_length; ++i) {
1269 UChar c = is8Bit() ? m_data8[i] : m_data16[i];
1270 if (c == oldC)
1271 break;
1272 }
1273 if (i == m_length)
1274 return this;
1275
1276 if (is8Bit()) {
1277 if (oldC > 0xff)
1278 // Looking for a 16 bit char in an 8 bit string, we're done.
1279 return this;
1280
1281 if (newC <= 0xff) {
1282 LChar* data;
1283 LChar oldChar = static_cast<LChar>(oldC);
1284 LChar newChar = static_cast<LChar>(newC);
1285
1286 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1287
1288 for (i = 0; i != m_length; ++i) {
1289 LChar ch = m_data8[i];
1290 if (ch == oldChar)
1291 ch = newChar;
1292 data[i] = ch;
1293 }
1294 return newImpl.release();
1295 }
1296
1297 // There is the possibility we need to up convert from 8 to 16 bit,
1298 // create a 16 bit string for the result.
1299 UChar* data;
1300 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1301
1302 for (i = 0; i != m_length; ++i) {
1303 UChar ch = m_data8[i];
1304 if (ch == oldC)
1305 ch = newC;
1306 data[i] = ch;
1307 }
1308
1309 return newImpl.release();
1310 }
1311
1312 UChar* data;
1313 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1314
1315 for (i = 0; i != m_length; ++i) {
1316 UChar ch = m_data16[i];
1317 if (ch == oldC)
1318 ch = newC;
1319 data[i] = ch;
1320 }
1321 return newImpl.release();
1322 }
1323
1324 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToR eplace, StringImpl* str)
1325 {
1326 position = min(position, length());
1327 lengthToReplace = min(lengthToReplace, length() - position);
1328 unsigned lengthToInsert = str ? str->length() : 0;
1329 if (!lengthToReplace && !lengthToInsert)
1330 return this;
1331
1332 RELEASE_ASSERT((length() - lengthToReplace) < (numeric_limits<unsigned>::max () - lengthToInsert));
1333
1334 if (is8Bit() && (!str || str->is8Bit())) {
1335 LChar* data;
1336 RefPtr<StringImpl> newImpl =
1337 createUninitialized(length() - lengthToReplace + lengthToInsert, data);
1338 memcpy(data, m_data8, position * sizeof(LChar));
1339 if (str)
1340 memcpy(data + position, str->m_data8, lengthToInsert * sizeof(LChar) );
1341 memcpy(data + position + lengthToInsert, m_data8 + position + lengthToRe place,
1342 (length() - position - lengthToReplace) * sizeof(LChar));
1343 return newImpl.release();
1344 }
1345 UChar* data;
1346 RefPtr<StringImpl> newImpl =
1347 createUninitialized(length() - lengthToReplace + lengthToInsert, data);
1348 if (is8Bit())
1349 for (unsigned i = 0; i < position; ++i)
1350 data[i] = m_data8[i];
1351 else
1352 memcpy(data, m_data16, position * sizeof(UChar));
1353 if (str) {
1354 if (str->is8Bit())
1355 for (unsigned i = 0; i < lengthToInsert; ++i)
1356 data[i + position] = str->m_data8[i];
1357 else
1358 memcpy(data + position, str->m_data16, lengthToInsert * sizeof(UChar ));
1359 }
1360 if (is8Bit()) {
1361 for (unsigned i = 0; i < length() - position - lengthToReplace; ++i)
1362 data[i + position + lengthToInsert] = m_data8[i + position + lengthT oReplace];
1363 } else {
1364 memcpy(data + position + lengthToInsert, characters() + position + lengt hToReplace,
1365 (length() - position - lengthToReplace) * sizeof(UChar));
1366 }
1367 return newImpl.release();
1368 }
1369
1370 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacemen t)
1371 {
1372 if (!replacement)
1373 return this;
1374
1375 if (replacement->is8Bit())
1376 return replace(pattern, replacement->m_data8, replacement->length());
1377
1378 return replace(pattern, replacement->m_data16, replacement->length());
1379 }
1380
1381 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const LChar* replaceme nt, unsigned repStrLength)
1382 {
1383 ASSERT(replacement);
1384
1385 size_t srcSegmentStart = 0;
1386 unsigned matchCount = 0;
1387
1388 // Count the matches.
1389 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) {
1390 ++matchCount;
1391 ++srcSegmentStart;
1392 }
1393
1394 // If we have 0 matches then we don't have to do any more work.
1395 if (!matchCount)
1396 return this;
1397
1398 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max( ) / repStrLength);
1399
1400 unsigned replaceSize = matchCount * repStrLength;
1401 unsigned newSize = m_length - matchCount;
1402 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
1403
1404 newSize += replaceSize;
1405
1406 // Construct the new data.
1407 size_t srcSegmentEnd;
1408 unsigned srcSegmentLength;
1409 srcSegmentStart = 0;
1410 unsigned dstOffset = 0;
1411
1412 if (is8Bit()) {
1413 LChar* data;
1414 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1415
1416 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {
1417 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1418 memcpy(data + dstOffset, m_data8 + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1419 dstOffset += srcSegmentLength;
1420 memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar));
1421 dstOffset += repStrLength;
1422 srcSegmentStart = srcSegmentEnd + 1;
1423 }
1424
1425 srcSegmentLength = m_length - srcSegmentStart;
1426 memcpy(data + dstOffset, m_data8 + srcSegmentStart, srcSegmentLength * s izeof(LChar));
1427
1428 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1429
1430 return newImpl.release();
1431 }
1432
1433 UChar* data;
1434 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1435
1436 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {
1437 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1438 memcpy(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1439
1440 dstOffset += srcSegmentLength;
1441 for (unsigned i = 0; i < repStrLength; ++i)
1442 data[i + dstOffset] = replacement[i];
1443
1444 dstOffset += repStrLength;
1445 srcSegmentStart = srcSegmentEnd + 1;
1446 }
1447
1448 srcSegmentLength = m_length - srcSegmentStart;
1449 memcpy(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength * size of(UChar));
1450
1451 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1452
1453 return newImpl.release();
1454 }
1455
1456 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const UChar* replaceme nt, unsigned repStrLength)
1457 {
1458 ASSERT(replacement);
1459
1460 size_t srcSegmentStart = 0;
1461 unsigned matchCount = 0;
1462
1463 // Count the matches.
1464 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) {
1465 ++matchCount;
1466 ++srcSegmentStart;
1467 }
1468
1469 // If we have 0 matches then we don't have to do any more work.
1470 if (!matchCount)
1471 return this;
1472
1473 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max( ) / repStrLength);
1474
1475 unsigned replaceSize = matchCount * repStrLength;
1476 unsigned newSize = m_length - matchCount;
1477 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
1478
1479 newSize += replaceSize;
1480
1481 // Construct the new data.
1482 size_t srcSegmentEnd;
1483 unsigned srcSegmentLength;
1484 srcSegmentStart = 0;
1485 unsigned dstOffset = 0;
1486
1487 if (is8Bit()) {
1488 UChar* data;
1489 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1490
1491 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {
1492 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1493 for (unsigned i = 0; i < srcSegmentLength; ++i)
1494 data[i + dstOffset] = m_data8[i + srcSegmentStart];
1495
1496 dstOffset += srcSegmentLength;
1497 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
1498
1499 dstOffset += repStrLength;
1500 srcSegmentStart = srcSegmentEnd + 1;
1501 }
1502
1503 srcSegmentLength = m_length - srcSegmentStart;
1504 for (unsigned i = 0; i < srcSegmentLength; ++i)
1505 data[i + dstOffset] = m_data8[i + srcSegmentStart];
1506
1507 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1508
1509 return newImpl.release();
1510 }
1511
1512 UChar* data;
1513 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1514
1515 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {
1516 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1517 memcpy(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1518
1519 dstOffset += srcSegmentLength;
1520 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
1521
1522 dstOffset += repStrLength;
1523 srcSegmentStart = srcSegmentEnd + 1;
1524 }
1525
1526 srcSegmentLength = m_length - srcSegmentStart;
1527 memcpy(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength * size of(UChar));
1528
1529 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1530
1531 return newImpl.release();
1532 }
1533
1534 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* repl acement)
1535 {
1536 if (!pattern || !replacement)
1537 return this;
1538
1539 unsigned patternLength = pattern->length();
1540 if (!patternLength)
1541 return this;
1542
1543 unsigned repStrLength = replacement->length();
1544 size_t srcSegmentStart = 0;
1545 unsigned matchCount = 0;
1546
1547 // Count the matches.
1548 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) {
1549 ++matchCount;
1550 srcSegmentStart += patternLength;
1551 }
1552
1553 // If we have 0 matches, we don't have to do any more work
1554 if (!matchCount)
1555 return this;
1556
1557 unsigned newSize = m_length - matchCount * patternLength;
1558 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max( ) / repStrLength);
1559
1560 RELEASE_ASSERT(newSize <= (numeric_limits<unsigned>::max() - matchCount * re pStrLength));
1561
1562 newSize += matchCount * repStrLength;
1563
1564
1565 // Construct the new data
1566 size_t srcSegmentEnd;
1567 unsigned srcSegmentLength;
1568 srcSegmentStart = 0;
1569 unsigned dstOffset = 0;
1570 bool srcIs8Bit = is8Bit();
1571 bool replacementIs8Bit = replacement->is8Bit();
1572
1573 // There are 4 cases:
1574 // 1. This and replacement are both 8 bit.
1575 // 2. This and replacement are both 16 bit.
1576 // 3. This is 8 bit and replacement is 16 bit.
1577 // 4. This is 16 bit and replacement is 8 bit.
1578 if (srcIs8Bit && replacementIs8Bit) {
1579 // Case 1
1580 LChar* data;
1581 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1582 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {
1583 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1584 memcpy(data + dstOffset, m_data8 + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1585 dstOffset += srcSegmentLength;
1586 memcpy(data + dstOffset, replacement->m_data8, repStrLength * sizeof (LChar));
1587 dstOffset += repStrLength;
1588 srcSegmentStart = srcSegmentEnd + patternLength;
1589 }
1590
1591 srcSegmentLength = m_length - srcSegmentStart;
1592 memcpy(data + dstOffset, m_data8 + srcSegmentStart, srcSegmentLength * s izeof(LChar));
1593
1594 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1595
1596 return newImpl.release();
1597 }
1598
1599 UChar* data;
1600 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1601 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {
1602 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1603 if (srcIs8Bit) {
1604 // Case 3.
1605 for (unsigned i = 0; i < srcSegmentLength; ++i)
1606 data[i + dstOffset] = m_data8[i + srcSegmentStart];
1607 } else {
1608 // Case 2 & 4.
1609 memcpy(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLengt h * sizeof(UChar));
1610 }
1611 dstOffset += srcSegmentLength;
1612 if (replacementIs8Bit) {
1613 // Cases 2 & 3.
1614 for (unsigned i = 0; i < repStrLength; ++i)
1615 data[i + dstOffset] = replacement->m_data8[i];
1616 } else {
1617 // Case 4
1618 memcpy(data + dstOffset, replacement->m_data16, repStrLength * sizeo f(UChar));
1619 }
1620 dstOffset += repStrLength;
1621 srcSegmentStart = srcSegmentEnd + patternLength;
1622 }
1623
1624 srcSegmentLength = m_length - srcSegmentStart;
1625 if (srcIs8Bit) {
1626 // Case 3.
1627 for (unsigned i = 0; i < srcSegmentLength; ++i)
1628 data[i + dstOffset] = m_data8[i + srcSegmentStart];
1629 } else {
1630 // Cases 2 & 4.
1631 memcpy(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1632 }
1633
1634 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1635
1636 return newImpl.release();
1637 }
1638
1639 static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl* b)
1640 {
1641 unsigned aLength = a->length();
1642 unsigned bLength = b->length();
1643 if (aLength != bLength)
1644 return false;
1645
1646 if (a->is8Bit()) {
1647 if (b->is8Bit())
1648 return equal(a->characters8(), b->characters8(), aLength);
1649
1650 return equal(a->characters8(), b->characters16(), aLength);
1651 }
1652
1653 if (b->is8Bit())
1654 return equal(a->characters16(), b->characters8(), aLength);
1655
1656 return equal(a->characters16(), b->characters16(), aLength);
1657 }
1658
1659 bool equal(const StringImpl* a, const StringImpl* b)
1660 {
1661 if (a == b)
1662 return true;
1663 if (!a || !b)
1664 return false;
1665
1666 return stringImplContentEqual(a, b);
1667 }
1668
1669 bool equal(const StringImpl* a, const LChar* b, unsigned length)
1670 {
1671 if (!a)
1672 return !b;
1673 if (!b)
1674 return !a;
1675
1676 if (length != a->length())
1677 return false;
1678
1679 if (a->is8Bit())
1680 return equal(a->characters8(), b, length);
1681 return equal(a->characters16(), b, length);
1682 }
1683
1684 bool equal(const StringImpl* a, const LChar* b)
1685 {
1686 if (!a)
1687 return !b;
1688 if (!b)
1689 return !a;
1690
1691 unsigned length = a->length();
1692
1693 if (a->is8Bit()) {
1694 const LChar* aPtr = a->characters8();
1695 for (unsigned i = 0; i != length; ++i) {
1696 LChar bc = b[i];
1697 LChar ac = aPtr[i];
1698 if (!bc)
1699 return false;
1700 if (ac != bc)
1701 return false;
1702 }
1703
1704 return !b[length];
1705 }
1706
1707 const UChar* aPtr = a->characters16();
1708 for (unsigned i = 0; i != length; ++i) {
1709 LChar bc = b[i];
1710 if (!bc)
1711 return false;
1712 if (aPtr[i] != bc)
1713 return false;
1714 }
1715
1716 return !b[length];
1717 }
1718
1719 bool equal(const StringImpl* a, const UChar* b, unsigned length)
1720 {
1721 if (!a)
1722 return !b;
1723 if (!b)
1724 return false;
1725
1726 if (a->length() != length)
1727 return false;
1728 if (a->is8Bit())
1729 return equal(a->characters8(), b, length);
1730 return equal(a->characters16(), b, length);
1731 }
1732
1733 bool equalNonNull(const StringImpl* a, const StringImpl* b)
1734 {
1735 ASSERT(a && b);
1736 if (a == b)
1737 return true;
1738
1739 return stringImplContentEqual(a, b);
1740 }
1741
1742 bool equalIgnoringCase(const StringImpl* a, const StringImpl* b)
1743 {
1744 if (a == b)
1745 return true;
1746 if (!a || !b)
1747 return false;
1748
1749 return CaseFoldingHash::equal(a, b);
1750 }
1751
1752 bool equalIgnoringCase(const StringImpl* a, const LChar* b)
1753 {
1754 if (!a)
1755 return !b;
1756 if (!b)
1757 return !a;
1758
1759 unsigned length = a->length();
1760
1761 // Do a faster loop for the case where all the characters are ASCII.
1762 UChar ored = 0;
1763 bool equal = true;
1764 if (a->is8Bit()) {
1765 const LChar* as = a->characters8();
1766 for (unsigned i = 0; i != length; ++i) {
1767 LChar bc = b[i];
1768 if (!bc)
1769 return false;
1770 UChar ac = as[i];
1771 ored |= ac;
1772 equal = equal && (toASCIILower(ac) == toASCIILower(bc));
1773 }
1774
1775 // Do a slower implementation for cases that include non-ASCII character s.
1776 if (ored & ~0x7F) {
1777 equal = true;
1778 for (unsigned i = 0; i != length; ++i)
1779 equal = equal && (foldCase(as[i]) == foldCase(b[i]));
1780 }
1781
1782 return equal && !b[length];
1783 }
1784
1785 const UChar* as = a->characters16();
1786 for (unsigned i = 0; i != length; ++i) {
1787 LChar bc = b[i];
1788 if (!bc)
1789 return false;
1790 UChar ac = as[i];
1791 ored |= ac;
1792 equal = equal && (toASCIILower(ac) == toASCIILower(bc));
1793 }
1794
1795 // Do a slower implementation for cases that include non-ASCII characters.
1796 if (ored & ~0x7F) {
1797 equal = true;
1798 for (unsigned i = 0; i != length; ++i) {
1799 equal = equal && (foldCase(as[i]) == foldCase(b[i]));
1800 }
1801 }
1802
1803 return equal && !b[length];
1804 }
1805
1806 bool equalIgnoringCaseNonNull(const StringImpl* a, const StringImpl* b)
1807 {
1808 ASSERT(a && b);
1809 if (a == b)
1810 return true;
1811
1812 unsigned length = a->length();
1813 if (length != b->length())
1814 return false;
1815
1816 if (a->is8Bit()) {
1817 if (b->is8Bit())
1818 return equalIgnoringCase(a->characters8(), b->characters8(), length) ;
1819
1820 return equalIgnoringCase(b->characters16(), a->characters8(), length);
1821 }
1822
1823 if (b->is8Bit())
1824 return equalIgnoringCase(a->characters16(), b->characters8(), length);
1825
1826 return equalIgnoringCase(a->characters16(), b->characters16(), length);
1827 }
1828
1829 bool equalIgnoringNullity(StringImpl* a, StringImpl* b)
1830 {
1831 if (!a && b && !b->length())
1832 return true;
1833 if (!b && a && !a->length())
1834 return true;
1835 return equal(a, b);
1836 }
1837
1838 WTF::Unicode::Direction StringImpl::defaultWritingDirection(bool* hasStrongDirec tionality)
1839 {
1840 for (unsigned i = 0; i < m_length; ++i) {
1841 WTF::Unicode::Direction charDirection = WTF::Unicode::direction(is8Bit() ? m_data8[i] : m_data16[i]);
1842 if (charDirection == WTF::Unicode::LeftToRight) {
1843 if (hasStrongDirectionality)
1844 *hasStrongDirectionality = true;
1845 return WTF::Unicode::LeftToRight;
1846 }
1847 if (charDirection == WTF::Unicode::RightToLeft || charDirection == WTF:: Unicode::RightToLeftArabic) {
1848 if (hasStrongDirectionality)
1849 *hasStrongDirectionality = true;
1850 return WTF::Unicode::RightToLeft;
1851 }
1852 }
1853 if (hasStrongDirectionality)
1854 *hasStrongDirectionality = false;
1855 return WTF::Unicode::LeftToRight;
1856 }
1857
1858 PassRefPtr<StringImpl> StringImpl::adopt(StringBuffer<LChar>& buffer)
1859 {
1860 unsigned length = buffer.length();
1861 if (!length)
1862 return empty();
1863 return adoptRef(new StringImpl(buffer.release(), length));
1864 }
1865
1866 PassRefPtr<StringImpl> StringImpl::adopt(StringBuffer<UChar>& buffer)
1867 {
1868 unsigned length = buffer.length();
1869 if (!length)
1870 return empty();
1871 return adoptRef(new StringImpl(buffer.release(), length));
1872 }
1873
1874 PassRefPtr<StringImpl> StringImpl::createWithTerminatingNullCharacter(const Stri ngImpl& string)
1875 {
1876 // Use createUninitialized instead of 'new StringImpl' so that the string an d its buffer
1877 // get allocated in a single memory block.
1878 unsigned length = string.m_length;
1879 RELEASE_ASSERT(length < numeric_limits<unsigned>::max());
1880 RefPtr<StringImpl> terminatedString;
1881 if (string.is8Bit()) {
1882 LChar* data;
1883 terminatedString = createUninitialized(length + 1, data);
1884 memcpy(data, string.m_data8, length * sizeof(LChar));
1885 data[length] = 0;
1886 } else {
1887 UChar* data;
1888 terminatedString = createUninitialized(length + 1, data);
1889 memcpy(data, string.m_data16, length * sizeof(UChar));
1890 data[length] = 0;
1891 }
1892 --(terminatedString->m_length);
1893 terminatedString->m_hashAndFlags = (string.m_hashAndFlags & (~s_flagMask | s _hashFlag8BitBuffer)) | s_hashFlagHasTerminatingNullCharacter;
1894 return terminatedString.release();
1895 }
1896
1897 size_t StringImpl::sizeInBytes() const
1898 {
1899 // FIXME: support substrings
1900 size_t size = length();
1901 if (is8Bit()) {
1902 if (has16BitShadow()) {
1903 size += 2 * size;
1904 if (hasTerminatingNullCharacter())
1905 size += 2;
1906 }
1907 } else
1908 size *= 2;
1909 return size + sizeof(*this);
1910 }
1911
1912 } // namespace WTF
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698