OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org) | |
3 * (C) 1999 Antti Koivisto (koivisto@kde.org) | |
4 * (C) 2001 Dirk Mueller ( mueller@kde.org ) | |
5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All r
ights reserved. | |
6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net) | |
7 * | |
8 * This library is free software; you can redistribute it and/or | |
9 * modify it under the terms of the GNU Library General Public | |
10 * License as published by the Free Software Foundation; either | |
11 * version 2 of the License, or (at your option) any later version. | |
12 * | |
13 * This library is distributed in the hope that it will be useful, | |
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 * Library General Public License for more details. | |
17 * | |
18 * You should have received a copy of the GNU Library General Public License | |
19 * along with this library; see the file COPYING.LIB. If not, write to | |
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
21 * Boston, MA 02110-1301, USA. | |
22 * | |
23 */ | |
24 | |
25 #include "config.h" | |
26 #include "StringImpl.h" | |
27 | |
28 #include "AtomicString.h" | |
29 #include "StringBuffer.h" | |
30 #include "StringHash.h" | |
31 #include <wtf/ProcessID.h> | |
32 #include <wtf/StdLibExtras.h> | |
33 #include <wtf/WTFThreadData.h> | |
34 #include <wtf/unicode/CharacterNames.h> | |
35 | |
36 #ifdef STRING_STATS | |
37 #include <unistd.h> | |
38 #include <wtf/DataLog.h> | |
39 #endif | |
40 | |
41 using namespace std; | |
42 | |
43 namespace WTF { | |
44 | |
45 using namespace Unicode; | |
46 | |
47 COMPILE_ASSERT(sizeof(StringImpl) == 2 * sizeof(int) + 3 * sizeof(void*), String
Impl_should_stay_small); | |
48 | |
49 #ifdef STRING_STATS | |
50 StringStats StringImpl::m_stringStats; | |
51 | |
52 unsigned StringStats::s_stringRemovesTillPrintStats = StringStats::s_printString
StatsFrequency; | |
53 | |
54 void StringStats::removeString(StringImpl* string) | |
55 { | |
56 unsigned length = string->length(); | |
57 bool isSubString = string->isSubString(); | |
58 | |
59 --m_totalNumberStrings; | |
60 | |
61 if (string->has16BitShadow()) { | |
62 --m_numberUpconvertedStrings; | |
63 if (!isSubString) | |
64 m_totalUpconvertedData -= length; | |
65 } | |
66 | |
67 if (string->is8Bit()) { | |
68 --m_number8BitStrings; | |
69 if (!isSubString) | |
70 m_total8BitData -= length; | |
71 } else { | |
72 --m_number16BitStrings; | |
73 if (!isSubString) | |
74 m_total16BitData -= length; | |
75 } | |
76 | |
77 if (!--s_stringRemovesTillPrintStats) { | |
78 s_stringRemovesTillPrintStats = s_printStringStatsFrequency; | |
79 printStats(); | |
80 } | |
81 } | |
82 | |
83 void StringStats::printStats() | |
84 { | |
85 dataLogF("String stats for process id %d:\n", getCurrentProcessID()); | |
86 | |
87 unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitDat
a; | |
88 double percent8Bit = m_totalNumberStrings ? ((double)m_number8BitStrings * 1
00) / (double)m_totalNumberStrings : 0.0; | |
89 double average8bitLength = m_number8BitStrings ? (double)m_total8BitData / (
double)m_number8BitStrings : 0.0; | |
90 dataLogF("%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length
%6.1f\n", m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData, av
erage8bitLength); | |
91 | |
92 double percent16Bit = m_totalNumberStrings ? ((double)m_number16BitStrings *
100) / (double)m_totalNumberStrings : 0.0; | |
93 double average16bitLength = m_number16BitStrings ? (double)m_total16BitData
/ (double)m_number16BitStrings : 0.0; | |
94 dataLogF("%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length
%6.1f\n", m_number16BitStrings, percent16Bit, m_total16BitData, m_total16BitData
* 2, average16bitLength); | |
95 | |
96 double percentUpconverted = m_totalNumberStrings ? ((double)m_numberUpconver
tedStrings * 100) / (double)m_number8BitStrings : 0.0; | |
97 double averageUpconvertedLength = m_numberUpconvertedStrings ? (double)m_tot
alUpconvertedData / (double)m_numberUpconvertedStrings : 0.0; | |
98 dataLogF("%8u (%5.2f%%) upconverted %12llu chars %12llu bytes avg length
%6.1f\n", m_numberUpconvertedStrings, percentUpconverted, m_totalUpconvertedData
, m_totalUpconvertedData * 2, averageUpconvertedLength); | |
99 | |
100 double averageLength = m_totalNumberStrings ? (double)totalNumberCharacters
/ (double)m_totalNumberStrings : 0.0; | |
101 unsigned long long totalDataBytes = m_total8BitData + (m_total16BitData + m_
totalUpconvertedData) * 2; | |
102 dataLogF("%8u Total %12llu chars %12llu bytes avg length %
6.1f\n", m_totalNumberStrings, totalNumberCharacters, totalDataBytes, averageLen
gth); | |
103 unsigned long long totalSavedBytes = m_total8BitData - m_totalUpconvertedDat
a; | |
104 double percentSavings = totalSavedBytes ? ((double)totalSavedBytes * 100) /
(double)(totalDataBytes + totalSavedBytes) : 0.0; | |
105 dataLogF(" Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes,
percentSavings); | |
106 } | |
107 #endif | |
108 | |
109 | |
110 StringImpl::~StringImpl() | |
111 { | |
112 ASSERT(!isStatic()); | |
113 | |
114 STRING_STATS_REMOVE_STRING(this); | |
115 | |
116 if (isAtomic()) | |
117 AtomicString::remove(this); | |
118 | |
119 BufferOwnership ownership = bufferOwnership(); | |
120 | |
121 if (has16BitShadow()) { | |
122 ASSERT(m_copyData16); | |
123 fastFree(m_copyData16); | |
124 } | |
125 | |
126 if (ownership == BufferInternal) | |
127 return; | |
128 if (ownership == BufferOwned) { | |
129 // We use m_data8, but since it is a union with m_data16 this works eith
er way. | |
130 ASSERT(m_data8); | |
131 fastFree(const_cast<LChar*>(m_data8)); | |
132 return; | |
133 } | |
134 ASSERT(ownership == BufferSubstring); | |
135 ASSERT(m_substringBuffer); | |
136 m_substringBuffer->deref(); | |
137 } | |
138 | |
139 PassRefPtr<StringImpl> StringImpl::createFromLiteral(const char* characters, uns
igned length) | |
140 { | |
141 ASSERT_WITH_MESSAGE(length, "Use StringImpl::empty() to create an empty stri
ng"); | |
142 ASSERT(charactersAreAllASCII<LChar>(reinterpret_cast<const LChar*>(character
s), length)); | |
143 return adoptRef(new StringImpl(characters, length, ConstructFromLiteral)); | |
144 } | |
145 | |
146 PassRefPtr<StringImpl> StringImpl::createFromLiteral(const char* characters) | |
147 { | |
148 size_t length = strlen(characters); | |
149 ASSERT_WITH_MESSAGE(length, "Use StringImpl::empty() to create an empty stri
ng"); | |
150 ASSERT(charactersAreAllASCII<LChar>(reinterpret_cast<const LChar*>(character
s), length)); | |
151 return adoptRef(new StringImpl(characters, length, ConstructFromLiteral)); | |
152 } | |
153 | |
154 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, LChar*&
data) | |
155 { | |
156 if (!length) { | |
157 data = 0; | |
158 return empty(); | |
159 } | |
160 | |
161 // Allocate a single buffer large enough to contain the StringImpl | |
162 // struct as well as the data which it contains. This removes one | |
163 // heap allocation from this call. | |
164 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(Str
ingImpl)) / sizeof(LChar))); | |
165 size_t size = sizeof(StringImpl) + length * sizeof(LChar); | |
166 StringImpl* string = static_cast<StringImpl*>(fastMalloc(size)); | |
167 | |
168 data = reinterpret_cast<LChar*>(string + 1); | |
169 return adoptRef(new (NotNull, string) StringImpl(length, Force8BitConstructo
r)); | |
170 } | |
171 | |
172 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*&
data) | |
173 { | |
174 if (!length) { | |
175 data = 0; | |
176 return empty(); | |
177 } | |
178 | |
179 // Allocate a single buffer large enough to contain the StringImpl | |
180 // struct as well as the data which it contains. This removes one | |
181 // heap allocation from this call. | |
182 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(Str
ingImpl)) / sizeof(UChar))); | |
183 size_t size = sizeof(StringImpl) + length * sizeof(UChar); | |
184 StringImpl* string = static_cast<StringImpl*>(fastMalloc(size)); | |
185 | |
186 data = reinterpret_cast<UChar*>(string + 1); | |
187 return adoptRef(new (NotNull, string) StringImpl(length)); | |
188 } | |
189 | |
190 PassRefPtr<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalStr
ing, unsigned length, LChar*& data) | |
191 { | |
192 ASSERT(originalString->is8Bit()); | |
193 ASSERT(originalString->hasOneRef()); | |
194 ASSERT(originalString->bufferOwnership() == BufferInternal); | |
195 | |
196 if (!length) { | |
197 data = 0; | |
198 return empty(); | |
199 } | |
200 | |
201 // Same as createUninitialized() except here we use fastRealloc. | |
202 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(Str
ingImpl)) / sizeof(LChar))); | |
203 size_t size = sizeof(StringImpl) + length * sizeof(LChar); | |
204 originalString->~StringImpl(); | |
205 StringImpl* string = static_cast<StringImpl*>(fastRealloc(originalString.lea
kRef(), size)); | |
206 | |
207 data = reinterpret_cast<LChar*>(string + 1); | |
208 return adoptRef(new (NotNull, string) StringImpl(length, Force8BitConstructo
r)); | |
209 } | |
210 | |
211 PassRefPtr<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalStr
ing, unsigned length, UChar*& data) | |
212 { | |
213 ASSERT(!originalString->is8Bit()); | |
214 ASSERT(originalString->hasOneRef()); | |
215 ASSERT(originalString->bufferOwnership() == BufferInternal); | |
216 | |
217 if (!length) { | |
218 data = 0; | |
219 return empty(); | |
220 } | |
221 | |
222 // Same as createUninitialized() except here we use fastRealloc. | |
223 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(Str
ingImpl)) / sizeof(UChar))); | |
224 size_t size = sizeof(StringImpl) + length * sizeof(UChar); | |
225 originalString->~StringImpl(); | |
226 StringImpl* string = static_cast<StringImpl*>(fastRealloc(originalString.lea
kRef(), size)); | |
227 | |
228 data = reinterpret_cast<UChar*>(string + 1); | |
229 return adoptRef(new (NotNull, string) StringImpl(length)); | |
230 } | |
231 | |
232 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned leng
th) | |
233 { | |
234 if (!characters || !length) | |
235 return empty(); | |
236 | |
237 UChar* data; | |
238 RefPtr<StringImpl> string = createUninitialized(length, data); | |
239 memcpy(data, characters, length * sizeof(UChar)); | |
240 return string.release(); | |
241 } | |
242 | |
243 PassRefPtr<StringImpl> StringImpl::create(const LChar* characters, unsigned leng
th) | |
244 { | |
245 if (!characters || !length) | |
246 return empty(); | |
247 | |
248 LChar* data; | |
249 RefPtr<StringImpl> string = createUninitialized(length, data); | |
250 memcpy(data, characters, length * sizeof(LChar)); | |
251 return string.release(); | |
252 } | |
253 | |
254 PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters,
unsigned length) | |
255 { | |
256 if (!characters || !length) | |
257 return empty(); | |
258 | |
259 LChar* data; | |
260 RefPtr<StringImpl> string = createUninitialized(length, data); | |
261 | |
262 for (size_t i = 0; i < length; ++i) { | |
263 if (characters[i] & 0xff00) | |
264 return create(characters, length); | |
265 data[i] = static_cast<LChar>(characters[i]); | |
266 } | |
267 | |
268 return string.release(); | |
269 } | |
270 | |
271 PassRefPtr<StringImpl> StringImpl::create(const LChar* string) | |
272 { | |
273 if (!string) | |
274 return empty(); | |
275 size_t length = strlen(reinterpret_cast<const char*>(string)); | |
276 RELEASE_ASSERT(length <= numeric_limits<unsigned>::max()); | |
277 return create(string, length); | |
278 } | |
279 | |
280 const UChar* StringImpl::getData16SlowCase() const | |
281 { | |
282 if (has16BitShadow()) | |
283 return m_copyData16; | |
284 | |
285 if (bufferOwnership() == BufferSubstring) { | |
286 // If this is a substring, return a pointer into the parent string. | |
287 // TODO: Consider severing this string from the parent string | |
288 unsigned offset = m_data8 - m_substringBuffer->characters8(); | |
289 return m_substringBuffer->characters() + offset; | |
290 } | |
291 | |
292 STRING_STATS_ADD_UPCONVERTED_STRING(m_length); | |
293 | |
294 unsigned len = length(); | |
295 if (hasTerminatingNullCharacter()) | |
296 ++len; | |
297 | |
298 m_copyData16 = static_cast<UChar*>(fastMalloc(len * sizeof(UChar))); | |
299 | |
300 m_hashAndFlags |= s_hashFlagHas16BitShadow; | |
301 | |
302 upconvertCharacters(0, len); | |
303 | |
304 return m_copyData16; | |
305 } | |
306 | |
307 void StringImpl::upconvertCharacters(unsigned start, unsigned end) const | |
308 { | |
309 ASSERT(is8Bit()); | |
310 ASSERT(has16BitShadow()); | |
311 | |
312 for (size_t i = start; i < end; ++i) | |
313 m_copyData16[i] = m_data8[i]; | |
314 } | |
315 | |
316 | |
317 bool StringImpl::containsOnlyWhitespace() | |
318 { | |
319 // FIXME: The definition of whitespace here includes a number of characters | |
320 // that are not whitespace from the point of view of RenderText; I wonder if | |
321 // that's a problem in practice. | |
322 if (is8Bit()) { | |
323 for (unsigned i = 0; i < m_length; ++i) { | |
324 UChar c = m_data8[i]; | |
325 if (!isASCIISpace(c)) | |
326 return false; | |
327 } | |
328 | |
329 return true; | |
330 } | |
331 | |
332 for (unsigned i = 0; i < m_length; ++i) { | |
333 UChar c = m_data16[i]; | |
334 if (!isASCIISpace(c)) | |
335 return false; | |
336 } | |
337 return true; | |
338 } | |
339 | |
340 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length) | |
341 { | |
342 if (start >= m_length) | |
343 return empty(); | |
344 unsigned maxLength = m_length - start; | |
345 if (length >= maxLength) { | |
346 if (!start) | |
347 return this; | |
348 length = maxLength; | |
349 } | |
350 if (is8Bit()) | |
351 return create(m_data8 + start, length); | |
352 | |
353 return create(m_data16 + start, length); | |
354 } | |
355 | |
356 UChar32 StringImpl::characterStartingAt(unsigned i) | |
357 { | |
358 if (is8Bit()) | |
359 return m_data8[i]; | |
360 if (U16_IS_SINGLE(m_data16[i])) | |
361 return m_data16[i]; | |
362 if (i + 1 < m_length && U16_IS_LEAD(m_data16[i]) && U16_IS_TRAIL(m_data16[i
+ 1])) | |
363 return U16_GET_SUPPLEMENTARY(m_data16[i], m_data16[i + 1]); | |
364 return 0; | |
365 } | |
366 | |
367 PassRefPtr<StringImpl> StringImpl::lower() | |
368 { | |
369 // Note: This is a hot function in the Dromaeo benchmark, specifically the | |
370 // no-op code path up through the first 'return' statement. | |
371 | |
372 // First scan the string for uppercase and non-ASCII characters: | |
373 bool noUpper = true; | |
374 UChar ored = 0; | |
375 if (is8Bit()) { | |
376 const LChar* end = m_data8 + m_length; | |
377 for (const LChar* chp = m_data8; chp != end; ++chp) { | |
378 if (UNLIKELY(isASCIIUpper(*chp))) | |
379 noUpper = false; | |
380 ored |= *chp; | |
381 } | |
382 // Nothing to do if the string is all ASCII with no uppercase. | |
383 if (noUpper && !(ored & ~0x7F)) | |
384 return this; | |
385 | |
386 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>
::max())); | |
387 int32_t length = m_length; | |
388 | |
389 LChar* data8; | |
390 RefPtr<StringImpl> newImpl = createUninitialized(length, data8); | |
391 | |
392 if (!(ored & ~0x7F)) { | |
393 for (int32_t i = 0; i < length; ++i) | |
394 data8[i] = toASCIILower(m_data8[i]); | |
395 | |
396 return newImpl.release(); | |
397 } | |
398 | |
399 // Do a slower implementation for cases that include non-ASCII Latin-1 c
haracters. | |
400 for (int32_t i = 0; i < length; ++i) | |
401 data8[i] = static_cast<LChar>(Unicode::toLower(m_data8[i])); | |
402 | |
403 return newImpl.release(); | |
404 } | |
405 | |
406 const UChar *end = m_data16 + m_length; | |
407 for (const UChar* chp = m_data16; chp != end; ++chp) { | |
408 if (UNLIKELY(isASCIIUpper(*chp))) | |
409 noUpper = false; | |
410 ored |= *chp; | |
411 } | |
412 // Nothing to do if the string is all ASCII with no uppercase. | |
413 if (noUpper && !(ored & ~0x7F)) | |
414 return this; | |
415 | |
416 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::ma
x())); | |
417 int32_t length = m_length; | |
418 | |
419 if (!(ored & ~0x7F)) { | |
420 UChar* data16; | |
421 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); | |
422 | |
423 for (int32_t i = 0; i < length; ++i) { | |
424 UChar c = m_data16[i]; | |
425 data16[i] = toASCIILower(c); | |
426 } | |
427 return newImpl.release(); | |
428 } | |
429 | |
430 // Do a slower implementation for cases that include non-ASCII characters. | |
431 UChar* data16; | |
432 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); | |
433 | |
434 bool error; | |
435 int32_t realLength = Unicode::toLower(data16, length, m_data16, m_length, &e
rror); | |
436 if (!error && realLength == length) | |
437 return newImpl.release(); | |
438 | |
439 newImpl = createUninitialized(realLength, data16); | |
440 Unicode::toLower(data16, realLength, m_data16, m_length, &error); | |
441 if (error) | |
442 return this; | |
443 return newImpl.release(); | |
444 } | |
445 | |
446 PassRefPtr<StringImpl> StringImpl::upper() | |
447 { | |
448 // This function could be optimized for no-op cases the way lower() is, | |
449 // but in empirical testing, few actual calls to upper() are no-ops, so | |
450 // it wouldn't be worth the extra time for pre-scanning. | |
451 | |
452 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::ma
x())); | |
453 int32_t length = m_length; | |
454 | |
455 if (is8Bit()) { | |
456 LChar* data8; | |
457 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8); | |
458 | |
459 // Do a faster loop for the case where all the characters are ASCII. | |
460 LChar ored = 0; | |
461 for (int i = 0; i < length; ++i) { | |
462 LChar c = m_data8[i]; | |
463 ored |= c; | |
464 data8[i] = toASCIIUpper(c); | |
465 } | |
466 if (!(ored & ~0x7F)) | |
467 return newImpl.release(); | |
468 | |
469 // Do a slower implementation for cases that include non-ASCII Latin-1 c
haracters. | |
470 int numberSharpSCharacters = 0; | |
471 | |
472 // There are two special cases. | |
473 // 1. latin-1 characters when converted to upper case are 16 bit charac
ters. | |
474 // 2. Lower case sharp-S converts to "SS" (two characters) | |
475 for (int32_t i = 0; i < length; ++i) { | |
476 LChar c = m_data8[i]; | |
477 if (UNLIKELY(c == smallLetterSharpS)) | |
478 ++numberSharpSCharacters; | |
479 UChar upper = Unicode::toUpper(c); | |
480 if (UNLIKELY(upper > 0xff)) { | |
481 // Since this upper-cased character does not fit in an 8-bit str
ing, we need to take the 16-bit path. | |
482 goto upconvert; | |
483 } | |
484 data8[i] = static_cast<LChar>(upper); | |
485 } | |
486 | |
487 if (!numberSharpSCharacters) | |
488 return newImpl.release(); | |
489 | |
490 // We have numberSSCharacters sharp-s characters, but none of the other
special characters. | |
491 newImpl = createUninitialized(m_length + numberSharpSCharacters, data8); | |
492 | |
493 LChar* dest = data8; | |
494 | |
495 for (int32_t i = 0; i < length; ++i) { | |
496 LChar c = m_data8[i]; | |
497 if (c == smallLetterSharpS) { | |
498 *dest++ = 'S'; | |
499 *dest++ = 'S'; | |
500 } else | |
501 *dest++ = static_cast<LChar>(Unicode::toUpper(c)); | |
502 } | |
503 | |
504 return newImpl.release(); | |
505 } | |
506 | |
507 upconvert: | |
508 const UChar* source16 = characters(); | |
509 | |
510 UChar* data16; | |
511 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); | |
512 | |
513 // Do a faster loop for the case where all the characters are ASCII. | |
514 UChar ored = 0; | |
515 for (int i = 0; i < length; ++i) { | |
516 UChar c = source16[i]; | |
517 ored |= c; | |
518 data16[i] = toASCIIUpper(c); | |
519 } | |
520 if (!(ored & ~0x7F)) | |
521 return newImpl.release(); | |
522 | |
523 // Do a slower implementation for cases that include non-ASCII characters. | |
524 bool error; | |
525 newImpl = createUninitialized(m_length, data16); | |
526 int32_t realLength = Unicode::toUpper(data16, length, source16, m_length, &e
rror); | |
527 if (!error && realLength == length) | |
528 return newImpl; | |
529 newImpl = createUninitialized(realLength, data16); | |
530 Unicode::toUpper(data16, realLength, source16, m_length, &error); | |
531 if (error) | |
532 return this; | |
533 return newImpl.release(); | |
534 } | |
535 | |
536 PassRefPtr<StringImpl> StringImpl::fill(UChar character) | |
537 { | |
538 if (!m_length) | |
539 return this; | |
540 | |
541 if (!(character & ~0x7F)) { | |
542 LChar* data; | |
543 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
544 for (unsigned i = 0; i < m_length; ++i) | |
545 data[i] = character; | |
546 return newImpl.release(); | |
547 } | |
548 UChar* data; | |
549 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
550 for (unsigned i = 0; i < m_length; ++i) | |
551 data[i] = character; | |
552 return newImpl.release(); | |
553 } | |
554 | |
555 PassRefPtr<StringImpl> StringImpl::foldCase() | |
556 { | |
557 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::ma
x())); | |
558 int32_t length = m_length; | |
559 | |
560 if (is8Bit()) { | |
561 // Do a faster loop for the case where all the characters are ASCII. | |
562 LChar* data; | |
563 RefPtr <StringImpl>newImpl = createUninitialized(m_length, data); | |
564 LChar ored = 0; | |
565 | |
566 for (int32_t i = 0; i < length; ++i) { | |
567 LChar c = m_data8[i]; | |
568 data[i] = toASCIILower(c); | |
569 ored |= c; | |
570 } | |
571 | |
572 if (!(ored & ~0x7F)) | |
573 return newImpl.release(); | |
574 | |
575 // Do a slower implementation for cases that include non-ASCII Latin-1 c
haracters. | |
576 for (int32_t i = 0; i < length; ++i) | |
577 data[i] = static_cast<LChar>(Unicode::toLower(m_data8[i])); | |
578 | |
579 return newImpl.release(); | |
580 } | |
581 | |
582 // Do a faster loop for the case where all the characters are ASCII. | |
583 UChar* data; | |
584 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
585 UChar ored = 0; | |
586 for (int32_t i = 0; i < length; ++i) { | |
587 UChar c = m_data16[i]; | |
588 ored |= c; | |
589 data[i] = toASCIILower(c); | |
590 } | |
591 if (!(ored & ~0x7F)) | |
592 return newImpl.release(); | |
593 | |
594 // Do a slower implementation for cases that include non-ASCII characters. | |
595 bool error; | |
596 int32_t realLength = Unicode::foldCase(data, length, m_data16, m_length, &er
ror); | |
597 if (!error && realLength == length) | |
598 return newImpl.release(); | |
599 newImpl = createUninitialized(realLength, data); | |
600 Unicode::foldCase(data, realLength, m_data16, m_length, &error); | |
601 if (error) | |
602 return this; | |
603 return newImpl.release(); | |
604 } | |
605 | |
606 template <class UCharPredicate> | |
607 inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters(UCharPredicate
predicate) | |
608 { | |
609 if (!m_length) | |
610 return empty(); | |
611 | |
612 unsigned start = 0; | |
613 unsigned end = m_length - 1; | |
614 | |
615 // skip white space from start | |
616 while (start <= end && predicate(is8Bit() ? m_data8[start] : m_data16[start]
)) | |
617 ++start; | |
618 | |
619 // only white space | |
620 if (start > end) | |
621 return empty(); | |
622 | |
623 // skip white space from end | |
624 while (end && predicate(is8Bit() ? m_data8[end] : m_data16[end])) | |
625 --end; | |
626 | |
627 if (!start && end == m_length - 1) | |
628 return this; | |
629 if (is8Bit()) | |
630 return create(m_data8 + start, end + 1 - start); | |
631 return create(m_data16 + start, end + 1 - start); | |
632 } | |
633 | |
634 class UCharPredicate { | |
635 public: | |
636 inline UCharPredicate(CharacterMatchFunctionPtr function): m_function(functi
on) { } | |
637 | |
638 inline bool operator()(UChar ch) const | |
639 { | |
640 return m_function(ch); | |
641 } | |
642 | |
643 private: | |
644 const CharacterMatchFunctionPtr m_function; | |
645 }; | |
646 | |
647 class SpaceOrNewlinePredicate { | |
648 public: | |
649 inline bool operator()(UChar ch) const | |
650 { | |
651 return isSpaceOrNewline(ch); | |
652 } | |
653 }; | |
654 | |
655 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace() | |
656 { | |
657 return stripMatchedCharacters(SpaceOrNewlinePredicate()); | |
658 } | |
659 | |
660 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhi
teSpace) | |
661 { | |
662 return stripMatchedCharacters(UCharPredicate(isWhiteSpace)); | |
663 } | |
664 | |
665 template <typename CharType> | |
666 ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters(const CharType
* characters, CharacterMatchFunctionPtr findMatch) | |
667 { | |
668 const CharType* from = characters; | |
669 const CharType* fromend = from + m_length; | |
670 | |
671 // Assume the common case will not remove any characters | |
672 while (from != fromend && !findMatch(*from)) | |
673 ++from; | |
674 if (from == fromend) | |
675 return this; | |
676 | |
677 StringBuffer<CharType> data(m_length); | |
678 CharType* to = data.characters(); | |
679 unsigned outc = from - characters; | |
680 | |
681 if (outc) | |
682 memcpy(to, characters, outc * sizeof(CharType)); | |
683 | |
684 while (true) { | |
685 while (from != fromend && findMatch(*from)) | |
686 ++from; | |
687 while (from != fromend && !findMatch(*from)) | |
688 to[outc++] = *from++; | |
689 if (from == fromend) | |
690 break; | |
691 } | |
692 | |
693 data.shrink(outc); | |
694 | |
695 return adopt(data); | |
696 } | |
697 | |
698 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr fi
ndMatch) | |
699 { | |
700 if (is8Bit()) | |
701 return removeCharacters(characters8(), findMatch); | |
702 return removeCharacters(characters16(), findMatch); | |
703 } | |
704 | |
705 template <typename CharType, class UCharPredicate> | |
706 inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UChar
Predicate predicate) | |
707 { | |
708 StringBuffer<CharType> data(m_length); | |
709 | |
710 const CharType* from = getCharacters<CharType>(); | |
711 const CharType* fromend = from + m_length; | |
712 int outc = 0; | |
713 bool changedToSpace = false; | |
714 | |
715 CharType* to = data.characters(); | |
716 | |
717 while (true) { | |
718 while (from != fromend && predicate(*from)) { | |
719 if (*from != ' ') | |
720 changedToSpace = true; | |
721 ++from; | |
722 } | |
723 while (from != fromend && !predicate(*from)) | |
724 to[outc++] = *from++; | |
725 if (from != fromend) | |
726 to[outc++] = ' '; | |
727 else | |
728 break; | |
729 } | |
730 | |
731 if (outc > 0 && to[outc - 1] == ' ') | |
732 --outc; | |
733 | |
734 if (static_cast<unsigned>(outc) == m_length && !changedToSpace) | |
735 return this; | |
736 | |
737 data.shrink(outc); | |
738 | |
739 return adopt(data); | |
740 } | |
741 | |
742 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace() | |
743 { | |
744 if (is8Bit()) | |
745 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(SpaceOrNewlin
ePredicate()); | |
746 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(SpaceOrNewlinePre
dicate()); | |
747 } | |
748 | |
749 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr is
WhiteSpace) | |
750 { | |
751 if (is8Bit()) | |
752 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(UCharPredicat
e(isWhiteSpace)); | |
753 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(UCharPredicate(is
WhiteSpace)); | |
754 } | |
755 | |
756 int StringImpl::toIntStrict(bool* ok, int base) | |
757 { | |
758 if (is8Bit()) | |
759 return charactersToIntStrict(characters8(), m_length, ok, base); | |
760 return charactersToIntStrict(characters16(), m_length, ok, base); | |
761 } | |
762 | |
763 unsigned StringImpl::toUIntStrict(bool* ok, int base) | |
764 { | |
765 if (is8Bit()) | |
766 return charactersToUIntStrict(characters8(), m_length, ok, base); | |
767 return charactersToUIntStrict(characters16(), m_length, ok, base); | |
768 } | |
769 | |
770 int64_t StringImpl::toInt64Strict(bool* ok, int base) | |
771 { | |
772 if (is8Bit()) | |
773 return charactersToInt64Strict(characters8(), m_length, ok, base); | |
774 return charactersToInt64Strict(characters16(), m_length, ok, base); | |
775 } | |
776 | |
777 uint64_t StringImpl::toUInt64Strict(bool* ok, int base) | |
778 { | |
779 if (is8Bit()) | |
780 return charactersToUInt64Strict(characters8(), m_length, ok, base); | |
781 return charactersToUInt64Strict(characters16(), m_length, ok, base); | |
782 } | |
783 | |
784 intptr_t StringImpl::toIntPtrStrict(bool* ok, int base) | |
785 { | |
786 if (is8Bit()) | |
787 return charactersToIntPtrStrict(characters8(), m_length, ok, base); | |
788 return charactersToIntPtrStrict(characters16(), m_length, ok, base); | |
789 } | |
790 | |
791 int StringImpl::toInt(bool* ok) | |
792 { | |
793 if (is8Bit()) | |
794 return charactersToInt(characters8(), m_length, ok); | |
795 return charactersToInt(characters16(), m_length, ok); | |
796 } | |
797 | |
798 unsigned StringImpl::toUInt(bool* ok) | |
799 { | |
800 if (is8Bit()) | |
801 return charactersToUInt(characters8(), m_length, ok); | |
802 return charactersToUInt(characters16(), m_length, ok); | |
803 } | |
804 | |
805 int64_t StringImpl::toInt64(bool* ok) | |
806 { | |
807 if (is8Bit()) | |
808 return charactersToInt64(characters8(), m_length, ok); | |
809 return charactersToInt64(characters16(), m_length, ok); | |
810 } | |
811 | |
812 uint64_t StringImpl::toUInt64(bool* ok) | |
813 { | |
814 if (is8Bit()) | |
815 return charactersToUInt64(characters8(), m_length, ok); | |
816 return charactersToUInt64(characters16(), m_length, ok); | |
817 } | |
818 | |
819 intptr_t StringImpl::toIntPtr(bool* ok) | |
820 { | |
821 if (is8Bit()) | |
822 return charactersToIntPtr(characters8(), m_length, ok); | |
823 return charactersToIntPtr(characters16(), m_length, ok); | |
824 } | |
825 | |
826 double StringImpl::toDouble(bool* ok) | |
827 { | |
828 if (is8Bit()) | |
829 return charactersToDouble(characters8(), m_length, ok); | |
830 return charactersToDouble(characters16(), m_length, ok); | |
831 } | |
832 | |
833 float StringImpl::toFloat(bool* ok) | |
834 { | |
835 if (is8Bit()) | |
836 return charactersToFloat(characters8(), m_length, ok); | |
837 return charactersToFloat(characters16(), m_length, ok); | |
838 } | |
839 | |
840 bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length) | |
841 { | |
842 while (length--) { | |
843 LChar bc = *b++; | |
844 if (foldCase(*a++) != foldCase(bc)) | |
845 return false; | |
846 } | |
847 return true; | |
848 } | |
849 | |
850 bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length) | |
851 { | |
852 while (length--) { | |
853 LChar bc = *b++; | |
854 if (foldCase(*a++) != foldCase(bc)) | |
855 return false; | |
856 } | |
857 return true; | |
858 } | |
859 | |
860 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start) | |
861 { | |
862 if (is8Bit()) | |
863 return WTF::find(characters8(), m_length, matchFunction, start); | |
864 return WTF::find(characters16(), m_length, matchFunction, start); | |
865 } | |
866 | |
867 size_t StringImpl::find(const LChar* matchString, unsigned index) | |
868 { | |
869 // Check for null or empty string to match against | |
870 if (!matchString) | |
871 return notFound; | |
872 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString)
); | |
873 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max()); | |
874 unsigned matchLength = matchStringLength; | |
875 if (!matchLength) | |
876 return min(index, length()); | |
877 | |
878 // Optimization 1: fast case for strings of length 1. | |
879 if (matchLength == 1) | |
880 return WTF::find(characters16(), length(), *matchString, index); | |
881 | |
882 // Check index & matchLength are in range. | |
883 if (index > length()) | |
884 return notFound; | |
885 unsigned searchLength = length() - index; | |
886 if (matchLength > searchLength) | |
887 return notFound; | |
888 // delta is the number of additional times to test; delta == 0 means test on
ly once. | |
889 unsigned delta = searchLength - matchLength; | |
890 | |
891 const UChar* searchCharacters = characters() + index; | |
892 | |
893 // Optimization 2: keep a running hash of the strings, | |
894 // only call equal if the hashes match. | |
895 unsigned searchHash = 0; | |
896 unsigned matchHash = 0; | |
897 for (unsigned i = 0; i < matchLength; ++i) { | |
898 searchHash += searchCharacters[i]; | |
899 matchHash += matchString[i]; | |
900 } | |
901 | |
902 unsigned i = 0; | |
903 // keep looping until we match | |
904 while (searchHash != matchHash || !equal(searchCharacters + i, matchString,
matchLength)) { | |
905 if (i == delta) | |
906 return notFound; | |
907 searchHash += searchCharacters[i + matchLength]; | |
908 searchHash -= searchCharacters[i]; | |
909 ++i; | |
910 } | |
911 return index + i; | |
912 } | |
913 | |
914 size_t StringImpl::findIgnoringCase(const LChar* matchString, unsigned index) | |
915 { | |
916 // Check for null or empty string to match against | |
917 if (!matchString) | |
918 return notFound; | |
919 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString)
); | |
920 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max()); | |
921 unsigned matchLength = matchStringLength; | |
922 if (!matchLength) | |
923 return min(index, length()); | |
924 | |
925 // Check index & matchLength are in range. | |
926 if (index > length()) | |
927 return notFound; | |
928 unsigned searchLength = length() - index; | |
929 if (matchLength > searchLength) | |
930 return notFound; | |
931 // delta is the number of additional times to test; delta == 0 means test on
ly once. | |
932 unsigned delta = searchLength - matchLength; | |
933 | |
934 const UChar* searchCharacters = characters() + index; | |
935 | |
936 unsigned i = 0; | |
937 // keep looping until we match | |
938 while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) { | |
939 if (i == delta) | |
940 return notFound; | |
941 ++i; | |
942 } | |
943 return index + i; | |
944 } | |
945 | |
946 template <typename SearchCharacterType, typename MatchCharacterType> | |
947 ALWAYS_INLINE static size_t findInner(const SearchCharacterType* searchCharacter
s, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLen
gth, unsigned matchLength) | |
948 { | |
949 // Optimization: keep a running hash of the strings, | |
950 // only call equal() if the hashes match. | |
951 | |
952 // delta is the number of additional times to test; delta == 0 means test on
ly once. | |
953 unsigned delta = searchLength - matchLength; | |
954 | |
955 unsigned searchHash = 0; | |
956 unsigned matchHash = 0; | |
957 | |
958 for (unsigned i = 0; i < matchLength; ++i) { | |
959 searchHash += searchCharacters[i]; | |
960 matchHash += matchCharacters[i]; | |
961 } | |
962 | |
963 unsigned i = 0; | |
964 // keep looping until we match | |
965 while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacte
rs, matchLength)) { | |
966 if (i == delta) | |
967 return notFound; | |
968 searchHash += searchCharacters[i + matchLength]; | |
969 searchHash -= searchCharacters[i]; | |
970 ++i; | |
971 } | |
972 return index + i; | |
973 } | |
974 | |
975 size_t StringImpl::find(StringImpl* matchString) | |
976 { | |
977 // Check for null string to match against | |
978 if (UNLIKELY(!matchString)) | |
979 return notFound; | |
980 unsigned matchLength = matchString->length(); | |
981 | |
982 // Optimization 1: fast case for strings of length 1. | |
983 if (matchLength == 1) { | |
984 if (is8Bit()) { | |
985 if (matchString->is8Bit()) | |
986 return WTF::find(characters8(), length(), matchString->character
s8()[0]); | |
987 return WTF::find(characters8(), length(), matchString->characters16(
)[0]); | |
988 } | |
989 if (matchString->is8Bit()) | |
990 return WTF::find(characters16(), length(), matchString->characters8(
)[0]); | |
991 return WTF::find(characters16(), length(), matchString->characters16()[0
]); | |
992 } | |
993 | |
994 // Check matchLength is in range. | |
995 if (matchLength > length()) | |
996 return notFound; | |
997 | |
998 // Check for empty string to match against | |
999 if (UNLIKELY(!matchLength)) | |
1000 return 0; | |
1001 | |
1002 if (is8Bit()) { | |
1003 if (matchString->is8Bit()) | |
1004 return findInner(characters8(), matchString->characters8(), 0, lengt
h(), matchLength); | |
1005 return findInner(characters8(), matchString->characters16(), 0, length()
, matchLength); | |
1006 } | |
1007 | |
1008 if (matchString->is8Bit()) | |
1009 return findInner(characters16(), matchString->characters8(), 0, length()
, matchLength); | |
1010 | |
1011 return findInner(characters16(), matchString->characters16(), 0, length(), m
atchLength); | |
1012 } | |
1013 | |
1014 size_t StringImpl::find(StringImpl* matchString, unsigned index) | |
1015 { | |
1016 // Check for null or empty string to match against | |
1017 if (UNLIKELY(!matchString)) | |
1018 return notFound; | |
1019 | |
1020 unsigned matchLength = matchString->length(); | |
1021 | |
1022 // Optimization 1: fast case for strings of length 1. | |
1023 if (matchLength == 1) { | |
1024 if (is8Bit()) | |
1025 return WTF::find(characters8(), length(), (*matchString)[0], index); | |
1026 return WTF::find(characters16(), length(), (*matchString)[0], index); | |
1027 } | |
1028 | |
1029 if (UNLIKELY(!matchLength)) | |
1030 return min(index, length()); | |
1031 | |
1032 // Check index & matchLength are in range. | |
1033 if (index > length()) | |
1034 return notFound; | |
1035 unsigned searchLength = length() - index; | |
1036 if (matchLength > searchLength) | |
1037 return notFound; | |
1038 | |
1039 if (is8Bit()) { | |
1040 if (matchString->is8Bit()) | |
1041 return findInner(characters8() + index, matchString->characters8(),
index, searchLength, matchLength); | |
1042 return findInner(characters8() + index, matchString->characters16(), ind
ex, searchLength, matchLength); | |
1043 } | |
1044 | |
1045 if (matchString->is8Bit()) | |
1046 return findInner(characters16() + index, matchString->characters8(), ind
ex, searchLength, matchLength); | |
1047 | |
1048 return findInner(characters16() + index, matchString->characters16(), index,
searchLength, matchLength); | |
1049 } | |
1050 | |
1051 template <typename SearchCharacterType, typename MatchCharacterType> | |
1052 ALWAYS_INLINE static size_t findIgnoringCaseInner(const SearchCharacterType* sea
rchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsign
ed searchLength, unsigned matchLength) | |
1053 { | |
1054 // delta is the number of additional times to test; delta == 0 means test on
ly once. | |
1055 unsigned delta = searchLength - matchLength; | |
1056 | |
1057 unsigned i = 0; | |
1058 // keep looping until we match | |
1059 while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength
)) { | |
1060 if (i == delta) | |
1061 return notFound; | |
1062 ++i; | |
1063 } | |
1064 return index + i; | |
1065 } | |
1066 | |
1067 size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index) | |
1068 { | |
1069 // Check for null or empty string to match against | |
1070 if (!matchString) | |
1071 return notFound; | |
1072 unsigned matchLength = matchString->length(); | |
1073 if (!matchLength) | |
1074 return min(index, length()); | |
1075 | |
1076 // Check index & matchLength are in range. | |
1077 if (index > length()) | |
1078 return notFound; | |
1079 unsigned searchLength = length() - index; | |
1080 if (matchLength > searchLength) | |
1081 return notFound; | |
1082 | |
1083 if (is8Bit()) { | |
1084 if (matchString->is8Bit()) | |
1085 return findIgnoringCaseInner(characters8() + index, matchString->cha
racters8(), index, searchLength, matchLength); | |
1086 return findIgnoringCaseInner(characters8() + index, matchString->charact
ers16(), index, searchLength, matchLength); | |
1087 } | |
1088 | |
1089 if (matchString->is8Bit()) | |
1090 return findIgnoringCaseInner(characters16() + index, matchString->charac
ters8(), index, searchLength, matchLength); | |
1091 | |
1092 return findIgnoringCaseInner(characters16() + index, matchString->characters
16(), index, searchLength, matchLength); | |
1093 } | |
1094 | |
1095 size_t StringImpl::findNextLineStart(unsigned index) | |
1096 { | |
1097 if (is8Bit()) | |
1098 return WTF::findNextLineStart(characters8(), m_length, index); | |
1099 return WTF::findNextLineStart(characters16(), m_length, index); | |
1100 } | |
1101 | |
1102 size_t StringImpl::reverseFind(UChar c, unsigned index) | |
1103 { | |
1104 if (is8Bit()) | |
1105 return WTF::reverseFind(characters8(), m_length, c, index); | |
1106 return WTF::reverseFind(characters16(), m_length, c, index); | |
1107 } | |
1108 | |
1109 template <typename SearchCharacterType, typename MatchCharacterType> | |
1110 ALWAYS_INLINE static size_t reverseFindInner(const SearchCharacterType* searchCh
aracters, const MatchCharacterType* matchCharacters, unsigned index, unsigned le
ngth, unsigned matchLength) | |
1111 { | |
1112 // Optimization: keep a running hash of the strings, | |
1113 // only call equal if the hashes match. | |
1114 | |
1115 // delta is the number of additional times to test; delta == 0 means test on
ly once. | |
1116 unsigned delta = min(index, length - matchLength); | |
1117 | |
1118 unsigned searchHash = 0; | |
1119 unsigned matchHash = 0; | |
1120 for (unsigned i = 0; i < matchLength; ++i) { | |
1121 searchHash += searchCharacters[delta + i]; | |
1122 matchHash += matchCharacters[i]; | |
1123 } | |
1124 | |
1125 // keep looping until we match | |
1126 while (searchHash != matchHash || !equal(searchCharacters + delta, matchChar
acters, matchLength)) { | |
1127 if (!delta) | |
1128 return notFound; | |
1129 --delta; | |
1130 searchHash -= searchCharacters[delta + matchLength]; | |
1131 searchHash += searchCharacters[delta]; | |
1132 } | |
1133 return delta; | |
1134 } | |
1135 | |
1136 size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index) | |
1137 { | |
1138 // Check for null or empty string to match against | |
1139 if (!matchString) | |
1140 return notFound; | |
1141 unsigned matchLength = matchString->length(); | |
1142 unsigned ourLength = length(); | |
1143 if (!matchLength) | |
1144 return min(index, ourLength); | |
1145 | |
1146 // Optimization 1: fast case for strings of length 1. | |
1147 if (matchLength == 1) { | |
1148 if (is8Bit()) | |
1149 return WTF::reverseFind(characters8(), ourLength, (*matchString)[0],
index); | |
1150 return WTF::reverseFind(characters16(), ourLength, (*matchString)[0], in
dex); | |
1151 } | |
1152 | |
1153 // Check index & matchLength are in range. | |
1154 if (matchLength > ourLength) | |
1155 return notFound; | |
1156 | |
1157 if (is8Bit()) { | |
1158 if (matchString->is8Bit()) | |
1159 return reverseFindInner(characters8(), matchString->characters8(), i
ndex, ourLength, matchLength); | |
1160 return reverseFindInner(characters8(), matchString->characters16(), inde
x, ourLength, matchLength); | |
1161 } | |
1162 | |
1163 if (matchString->is8Bit()) | |
1164 return reverseFindInner(characters16(), matchString->characters8(), inde
x, ourLength, matchLength); | |
1165 | |
1166 return reverseFindInner(characters16(), matchString->characters16(), index,
ourLength, matchLength); | |
1167 } | |
1168 | |
1169 template <typename SearchCharacterType, typename MatchCharacterType> | |
1170 ALWAYS_INLINE static size_t reverseFindIgnoringCaseInner(const SearchCharacterTy
pe* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index,
unsigned length, unsigned matchLength) | |
1171 { | |
1172 // delta is the number of additional times to test; delta == 0 means test on
ly once. | |
1173 unsigned delta = min(index, length - matchLength); | |
1174 | |
1175 // keep looping until we match | |
1176 while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLe
ngth)) { | |
1177 if (!delta) | |
1178 return notFound; | |
1179 --delta; | |
1180 } | |
1181 return delta; | |
1182 } | |
1183 | |
1184 size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned ind
ex) | |
1185 { | |
1186 // Check for null or empty string to match against | |
1187 if (!matchString) | |
1188 return notFound; | |
1189 unsigned matchLength = matchString->length(); | |
1190 unsigned ourLength = length(); | |
1191 if (!matchLength) | |
1192 return min(index, ourLength); | |
1193 | |
1194 // Check index & matchLength are in range. | |
1195 if (matchLength > ourLength) | |
1196 return notFound; | |
1197 | |
1198 if (is8Bit()) { | |
1199 if (matchString->is8Bit()) | |
1200 return reverseFindIgnoringCaseInner(characters8(), matchString->char
acters8(), index, ourLength, matchLength); | |
1201 return reverseFindIgnoringCaseInner(characters8(), matchString->characte
rs16(), index, ourLength, matchLength); | |
1202 } | |
1203 | |
1204 if (matchString->is8Bit()) | |
1205 return reverseFindIgnoringCaseInner(characters16(), matchString->charact
ers8(), index, ourLength, matchLength); | |
1206 | |
1207 return reverseFindIgnoringCaseInner(characters16(), matchString->characters1
6(), index, ourLength, matchLength); | |
1208 } | |
1209 | |
1210 ALWAYS_INLINE static bool equalInner(const StringImpl* stringImpl, unsigned star
tOffset, const char* matchString, unsigned matchLength, bool caseSensitive) | |
1211 { | |
1212 ASSERT(stringImpl); | |
1213 ASSERT(matchLength <= stringImpl->length()); | |
1214 ASSERT(startOffset + matchLength <= stringImpl->length()); | |
1215 | |
1216 if (caseSensitive) { | |
1217 if (stringImpl->is8Bit()) | |
1218 return equal(stringImpl->characters8() + startOffset, reinterpret_ca
st<const LChar*>(matchString), matchLength); | |
1219 return equal(stringImpl->characters16() + startOffset, reinterpret_cast<
const LChar*>(matchString), matchLength); | |
1220 } | |
1221 if (stringImpl->is8Bit()) | |
1222 return equalIgnoringCase(stringImpl->characters8() + startOffset, reinte
rpret_cast<const LChar*>(matchString), matchLength); | |
1223 return equalIgnoringCase(stringImpl->characters16() + startOffset, reinterpr
et_cast<const LChar*>(matchString), matchLength); | |
1224 } | |
1225 | |
1226 bool StringImpl::startsWith(UChar character) const | |
1227 { | |
1228 return m_length && (*this)[0] == character; | |
1229 } | |
1230 | |
1231 bool StringImpl::startsWith(const char* matchString, unsigned matchLength, bool
caseSensitive) const | |
1232 { | |
1233 ASSERT(matchLength); | |
1234 if (matchLength > length()) | |
1235 return false; | |
1236 return equalInner(this, 0, matchString, matchLength, caseSensitive); | |
1237 } | |
1238 | |
1239 bool StringImpl::endsWith(StringImpl* matchString, bool caseSensitive) | |
1240 { | |
1241 ASSERT(matchString); | |
1242 if (m_length >= matchString->m_length) { | |
1243 unsigned start = m_length - matchString->m_length; | |
1244 return (caseSensitive ? find(matchString, start) : findIgnoringCase(matc
hString, start)) == start; | |
1245 } | |
1246 return false; | |
1247 } | |
1248 | |
1249 bool StringImpl::endsWith(UChar character) const | |
1250 { | |
1251 return m_length && (*this)[m_length - 1] == character; | |
1252 } | |
1253 | |
1254 bool StringImpl::endsWith(const char* matchString, unsigned matchLength, bool ca
seSensitive) const | |
1255 { | |
1256 ASSERT(matchLength); | |
1257 if (matchLength > length()) | |
1258 return false; | |
1259 unsigned startOffset = length() - matchLength; | |
1260 return equalInner(this, startOffset, matchString, matchLength, caseSensitive
); | |
1261 } | |
1262 | |
1263 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC) | |
1264 { | |
1265 if (oldC == newC) | |
1266 return this; | |
1267 unsigned i; | |
1268 for (i = 0; i != m_length; ++i) { | |
1269 UChar c = is8Bit() ? m_data8[i] : m_data16[i]; | |
1270 if (c == oldC) | |
1271 break; | |
1272 } | |
1273 if (i == m_length) | |
1274 return this; | |
1275 | |
1276 if (is8Bit()) { | |
1277 if (oldC > 0xff) | |
1278 // Looking for a 16 bit char in an 8 bit string, we're done. | |
1279 return this; | |
1280 | |
1281 if (newC <= 0xff) { | |
1282 LChar* data; | |
1283 LChar oldChar = static_cast<LChar>(oldC); | |
1284 LChar newChar = static_cast<LChar>(newC); | |
1285 | |
1286 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
1287 | |
1288 for (i = 0; i != m_length; ++i) { | |
1289 LChar ch = m_data8[i]; | |
1290 if (ch == oldChar) | |
1291 ch = newChar; | |
1292 data[i] = ch; | |
1293 } | |
1294 return newImpl.release(); | |
1295 } | |
1296 | |
1297 // There is the possibility we need to up convert from 8 to 16 bit, | |
1298 // create a 16 bit string for the result. | |
1299 UChar* data; | |
1300 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
1301 | |
1302 for (i = 0; i != m_length; ++i) { | |
1303 UChar ch = m_data8[i]; | |
1304 if (ch == oldC) | |
1305 ch = newC; | |
1306 data[i] = ch; | |
1307 } | |
1308 | |
1309 return newImpl.release(); | |
1310 } | |
1311 | |
1312 UChar* data; | |
1313 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
1314 | |
1315 for (i = 0; i != m_length; ++i) { | |
1316 UChar ch = m_data16[i]; | |
1317 if (ch == oldC) | |
1318 ch = newC; | |
1319 data[i] = ch; | |
1320 } | |
1321 return newImpl.release(); | |
1322 } | |
1323 | |
1324 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToR
eplace, StringImpl* str) | |
1325 { | |
1326 position = min(position, length()); | |
1327 lengthToReplace = min(lengthToReplace, length() - position); | |
1328 unsigned lengthToInsert = str ? str->length() : 0; | |
1329 if (!lengthToReplace && !lengthToInsert) | |
1330 return this; | |
1331 | |
1332 RELEASE_ASSERT((length() - lengthToReplace) < (numeric_limits<unsigned>::max
() - lengthToInsert)); | |
1333 | |
1334 if (is8Bit() && (!str || str->is8Bit())) { | |
1335 LChar* data; | |
1336 RefPtr<StringImpl> newImpl = | |
1337 createUninitialized(length() - lengthToReplace + lengthToInsert, data); | |
1338 memcpy(data, m_data8, position * sizeof(LChar)); | |
1339 if (str) | |
1340 memcpy(data + position, str->m_data8, lengthToInsert * sizeof(LChar)
); | |
1341 memcpy(data + position + lengthToInsert, m_data8 + position + lengthToRe
place, | |
1342 (length() - position - lengthToReplace) * sizeof(LChar)); | |
1343 return newImpl.release(); | |
1344 } | |
1345 UChar* data; | |
1346 RefPtr<StringImpl> newImpl = | |
1347 createUninitialized(length() - lengthToReplace + lengthToInsert, data); | |
1348 if (is8Bit()) | |
1349 for (unsigned i = 0; i < position; ++i) | |
1350 data[i] = m_data8[i]; | |
1351 else | |
1352 memcpy(data, m_data16, position * sizeof(UChar)); | |
1353 if (str) { | |
1354 if (str->is8Bit()) | |
1355 for (unsigned i = 0; i < lengthToInsert; ++i) | |
1356 data[i + position] = str->m_data8[i]; | |
1357 else | |
1358 memcpy(data + position, str->m_data16, lengthToInsert * sizeof(UChar
)); | |
1359 } | |
1360 if (is8Bit()) { | |
1361 for (unsigned i = 0; i < length() - position - lengthToReplace; ++i) | |
1362 data[i + position + lengthToInsert] = m_data8[i + position + lengthT
oReplace]; | |
1363 } else { | |
1364 memcpy(data + position + lengthToInsert, characters() + position + lengt
hToReplace, | |
1365 (length() - position - lengthToReplace) * sizeof(UChar)); | |
1366 } | |
1367 return newImpl.release(); | |
1368 } | |
1369 | |
1370 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacemen
t) | |
1371 { | |
1372 if (!replacement) | |
1373 return this; | |
1374 | |
1375 if (replacement->is8Bit()) | |
1376 return replace(pattern, replacement->m_data8, replacement->length()); | |
1377 | |
1378 return replace(pattern, replacement->m_data16, replacement->length()); | |
1379 } | |
1380 | |
1381 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const LChar* replaceme
nt, unsigned repStrLength) | |
1382 { | |
1383 ASSERT(replacement); | |
1384 | |
1385 size_t srcSegmentStart = 0; | |
1386 unsigned matchCount = 0; | |
1387 | |
1388 // Count the matches. | |
1389 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) { | |
1390 ++matchCount; | |
1391 ++srcSegmentStart; | |
1392 } | |
1393 | |
1394 // If we have 0 matches then we don't have to do any more work. | |
1395 if (!matchCount) | |
1396 return this; | |
1397 | |
1398 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max(
) / repStrLength); | |
1399 | |
1400 unsigned replaceSize = matchCount * repStrLength; | |
1401 unsigned newSize = m_length - matchCount; | |
1402 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize)); | |
1403 | |
1404 newSize += replaceSize; | |
1405 | |
1406 // Construct the new data. | |
1407 size_t srcSegmentEnd; | |
1408 unsigned srcSegmentLength; | |
1409 srcSegmentStart = 0; | |
1410 unsigned dstOffset = 0; | |
1411 | |
1412 if (is8Bit()) { | |
1413 LChar* data; | |
1414 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | |
1415 | |
1416 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) { | |
1417 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
1418 memcpy(data + dstOffset, m_data8 + srcSegmentStart, srcSegmentLength
* sizeof(LChar)); | |
1419 dstOffset += srcSegmentLength; | |
1420 memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar)); | |
1421 dstOffset += repStrLength; | |
1422 srcSegmentStart = srcSegmentEnd + 1; | |
1423 } | |
1424 | |
1425 srcSegmentLength = m_length - srcSegmentStart; | |
1426 memcpy(data + dstOffset, m_data8 + srcSegmentStart, srcSegmentLength * s
izeof(LChar)); | |
1427 | |
1428 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); | |
1429 | |
1430 return newImpl.release(); | |
1431 } | |
1432 | |
1433 UChar* data; | |
1434 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | |
1435 | |
1436 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) { | |
1437 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
1438 memcpy(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength *
sizeof(UChar)); | |
1439 | |
1440 dstOffset += srcSegmentLength; | |
1441 for (unsigned i = 0; i < repStrLength; ++i) | |
1442 data[i + dstOffset] = replacement[i]; | |
1443 | |
1444 dstOffset += repStrLength; | |
1445 srcSegmentStart = srcSegmentEnd + 1; | |
1446 } | |
1447 | |
1448 srcSegmentLength = m_length - srcSegmentStart; | |
1449 memcpy(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength * size
of(UChar)); | |
1450 | |
1451 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); | |
1452 | |
1453 return newImpl.release(); | |
1454 } | |
1455 | |
1456 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const UChar* replaceme
nt, unsigned repStrLength) | |
1457 { | |
1458 ASSERT(replacement); | |
1459 | |
1460 size_t srcSegmentStart = 0; | |
1461 unsigned matchCount = 0; | |
1462 | |
1463 // Count the matches. | |
1464 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) { | |
1465 ++matchCount; | |
1466 ++srcSegmentStart; | |
1467 } | |
1468 | |
1469 // If we have 0 matches then we don't have to do any more work. | |
1470 if (!matchCount) | |
1471 return this; | |
1472 | |
1473 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max(
) / repStrLength); | |
1474 | |
1475 unsigned replaceSize = matchCount * repStrLength; | |
1476 unsigned newSize = m_length - matchCount; | |
1477 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize)); | |
1478 | |
1479 newSize += replaceSize; | |
1480 | |
1481 // Construct the new data. | |
1482 size_t srcSegmentEnd; | |
1483 unsigned srcSegmentLength; | |
1484 srcSegmentStart = 0; | |
1485 unsigned dstOffset = 0; | |
1486 | |
1487 if (is8Bit()) { | |
1488 UChar* data; | |
1489 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | |
1490 | |
1491 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) { | |
1492 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
1493 for (unsigned i = 0; i < srcSegmentLength; ++i) | |
1494 data[i + dstOffset] = m_data8[i + srcSegmentStart]; | |
1495 | |
1496 dstOffset += srcSegmentLength; | |
1497 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar)); | |
1498 | |
1499 dstOffset += repStrLength; | |
1500 srcSegmentStart = srcSegmentEnd + 1; | |
1501 } | |
1502 | |
1503 srcSegmentLength = m_length - srcSegmentStart; | |
1504 for (unsigned i = 0; i < srcSegmentLength; ++i) | |
1505 data[i + dstOffset] = m_data8[i + srcSegmentStart]; | |
1506 | |
1507 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); | |
1508 | |
1509 return newImpl.release(); | |
1510 } | |
1511 | |
1512 UChar* data; | |
1513 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | |
1514 | |
1515 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) { | |
1516 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
1517 memcpy(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength *
sizeof(UChar)); | |
1518 | |
1519 dstOffset += srcSegmentLength; | |
1520 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar)); | |
1521 | |
1522 dstOffset += repStrLength; | |
1523 srcSegmentStart = srcSegmentEnd + 1; | |
1524 } | |
1525 | |
1526 srcSegmentLength = m_length - srcSegmentStart; | |
1527 memcpy(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength * size
of(UChar)); | |
1528 | |
1529 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); | |
1530 | |
1531 return newImpl.release(); | |
1532 } | |
1533 | |
1534 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* repl
acement) | |
1535 { | |
1536 if (!pattern || !replacement) | |
1537 return this; | |
1538 | |
1539 unsigned patternLength = pattern->length(); | |
1540 if (!patternLength) | |
1541 return this; | |
1542 | |
1543 unsigned repStrLength = replacement->length(); | |
1544 size_t srcSegmentStart = 0; | |
1545 unsigned matchCount = 0; | |
1546 | |
1547 // Count the matches. | |
1548 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) { | |
1549 ++matchCount; | |
1550 srcSegmentStart += patternLength; | |
1551 } | |
1552 | |
1553 // If we have 0 matches, we don't have to do any more work | |
1554 if (!matchCount) | |
1555 return this; | |
1556 | |
1557 unsigned newSize = m_length - matchCount * patternLength; | |
1558 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max(
) / repStrLength); | |
1559 | |
1560 RELEASE_ASSERT(newSize <= (numeric_limits<unsigned>::max() - matchCount * re
pStrLength)); | |
1561 | |
1562 newSize += matchCount * repStrLength; | |
1563 | |
1564 | |
1565 // Construct the new data | |
1566 size_t srcSegmentEnd; | |
1567 unsigned srcSegmentLength; | |
1568 srcSegmentStart = 0; | |
1569 unsigned dstOffset = 0; | |
1570 bool srcIs8Bit = is8Bit(); | |
1571 bool replacementIs8Bit = replacement->is8Bit(); | |
1572 | |
1573 // There are 4 cases: | |
1574 // 1. This and replacement are both 8 bit. | |
1575 // 2. This and replacement are both 16 bit. | |
1576 // 3. This is 8 bit and replacement is 16 bit. | |
1577 // 4. This is 16 bit and replacement is 8 bit. | |
1578 if (srcIs8Bit && replacementIs8Bit) { | |
1579 // Case 1 | |
1580 LChar* data; | |
1581 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | |
1582 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) { | |
1583 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
1584 memcpy(data + dstOffset, m_data8 + srcSegmentStart, srcSegmentLength
* sizeof(LChar)); | |
1585 dstOffset += srcSegmentLength; | |
1586 memcpy(data + dstOffset, replacement->m_data8, repStrLength * sizeof
(LChar)); | |
1587 dstOffset += repStrLength; | |
1588 srcSegmentStart = srcSegmentEnd + patternLength; | |
1589 } | |
1590 | |
1591 srcSegmentLength = m_length - srcSegmentStart; | |
1592 memcpy(data + dstOffset, m_data8 + srcSegmentStart, srcSegmentLength * s
izeof(LChar)); | |
1593 | |
1594 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); | |
1595 | |
1596 return newImpl.release(); | |
1597 } | |
1598 | |
1599 UChar* data; | |
1600 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | |
1601 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) { | |
1602 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
1603 if (srcIs8Bit) { | |
1604 // Case 3. | |
1605 for (unsigned i = 0; i < srcSegmentLength; ++i) | |
1606 data[i + dstOffset] = m_data8[i + srcSegmentStart]; | |
1607 } else { | |
1608 // Case 2 & 4. | |
1609 memcpy(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLengt
h * sizeof(UChar)); | |
1610 } | |
1611 dstOffset += srcSegmentLength; | |
1612 if (replacementIs8Bit) { | |
1613 // Cases 2 & 3. | |
1614 for (unsigned i = 0; i < repStrLength; ++i) | |
1615 data[i + dstOffset] = replacement->m_data8[i]; | |
1616 } else { | |
1617 // Case 4 | |
1618 memcpy(data + dstOffset, replacement->m_data16, repStrLength * sizeo
f(UChar)); | |
1619 } | |
1620 dstOffset += repStrLength; | |
1621 srcSegmentStart = srcSegmentEnd + patternLength; | |
1622 } | |
1623 | |
1624 srcSegmentLength = m_length - srcSegmentStart; | |
1625 if (srcIs8Bit) { | |
1626 // Case 3. | |
1627 for (unsigned i = 0; i < srcSegmentLength; ++i) | |
1628 data[i + dstOffset] = m_data8[i + srcSegmentStart]; | |
1629 } else { | |
1630 // Cases 2 & 4. | |
1631 memcpy(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength *
sizeof(UChar)); | |
1632 } | |
1633 | |
1634 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); | |
1635 | |
1636 return newImpl.release(); | |
1637 } | |
1638 | |
1639 static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl*
b) | |
1640 { | |
1641 unsigned aLength = a->length(); | |
1642 unsigned bLength = b->length(); | |
1643 if (aLength != bLength) | |
1644 return false; | |
1645 | |
1646 if (a->is8Bit()) { | |
1647 if (b->is8Bit()) | |
1648 return equal(a->characters8(), b->characters8(), aLength); | |
1649 | |
1650 return equal(a->characters8(), b->characters16(), aLength); | |
1651 } | |
1652 | |
1653 if (b->is8Bit()) | |
1654 return equal(a->characters16(), b->characters8(), aLength); | |
1655 | |
1656 return equal(a->characters16(), b->characters16(), aLength); | |
1657 } | |
1658 | |
1659 bool equal(const StringImpl* a, const StringImpl* b) | |
1660 { | |
1661 if (a == b) | |
1662 return true; | |
1663 if (!a || !b) | |
1664 return false; | |
1665 | |
1666 return stringImplContentEqual(a, b); | |
1667 } | |
1668 | |
1669 bool equal(const StringImpl* a, const LChar* b, unsigned length) | |
1670 { | |
1671 if (!a) | |
1672 return !b; | |
1673 if (!b) | |
1674 return !a; | |
1675 | |
1676 if (length != a->length()) | |
1677 return false; | |
1678 | |
1679 if (a->is8Bit()) | |
1680 return equal(a->characters8(), b, length); | |
1681 return equal(a->characters16(), b, length); | |
1682 } | |
1683 | |
1684 bool equal(const StringImpl* a, const LChar* b) | |
1685 { | |
1686 if (!a) | |
1687 return !b; | |
1688 if (!b) | |
1689 return !a; | |
1690 | |
1691 unsigned length = a->length(); | |
1692 | |
1693 if (a->is8Bit()) { | |
1694 const LChar* aPtr = a->characters8(); | |
1695 for (unsigned i = 0; i != length; ++i) { | |
1696 LChar bc = b[i]; | |
1697 LChar ac = aPtr[i]; | |
1698 if (!bc) | |
1699 return false; | |
1700 if (ac != bc) | |
1701 return false; | |
1702 } | |
1703 | |
1704 return !b[length]; | |
1705 } | |
1706 | |
1707 const UChar* aPtr = a->characters16(); | |
1708 for (unsigned i = 0; i != length; ++i) { | |
1709 LChar bc = b[i]; | |
1710 if (!bc) | |
1711 return false; | |
1712 if (aPtr[i] != bc) | |
1713 return false; | |
1714 } | |
1715 | |
1716 return !b[length]; | |
1717 } | |
1718 | |
1719 bool equal(const StringImpl* a, const UChar* b, unsigned length) | |
1720 { | |
1721 if (!a) | |
1722 return !b; | |
1723 if (!b) | |
1724 return false; | |
1725 | |
1726 if (a->length() != length) | |
1727 return false; | |
1728 if (a->is8Bit()) | |
1729 return equal(a->characters8(), b, length); | |
1730 return equal(a->characters16(), b, length); | |
1731 } | |
1732 | |
1733 bool equalNonNull(const StringImpl* a, const StringImpl* b) | |
1734 { | |
1735 ASSERT(a && b); | |
1736 if (a == b) | |
1737 return true; | |
1738 | |
1739 return stringImplContentEqual(a, b); | |
1740 } | |
1741 | |
1742 bool equalIgnoringCase(const StringImpl* a, const StringImpl* b) | |
1743 { | |
1744 if (a == b) | |
1745 return true; | |
1746 if (!a || !b) | |
1747 return false; | |
1748 | |
1749 return CaseFoldingHash::equal(a, b); | |
1750 } | |
1751 | |
1752 bool equalIgnoringCase(const StringImpl* a, const LChar* b) | |
1753 { | |
1754 if (!a) | |
1755 return !b; | |
1756 if (!b) | |
1757 return !a; | |
1758 | |
1759 unsigned length = a->length(); | |
1760 | |
1761 // Do a faster loop for the case where all the characters are ASCII. | |
1762 UChar ored = 0; | |
1763 bool equal = true; | |
1764 if (a->is8Bit()) { | |
1765 const LChar* as = a->characters8(); | |
1766 for (unsigned i = 0; i != length; ++i) { | |
1767 LChar bc = b[i]; | |
1768 if (!bc) | |
1769 return false; | |
1770 UChar ac = as[i]; | |
1771 ored |= ac; | |
1772 equal = equal && (toASCIILower(ac) == toASCIILower(bc)); | |
1773 } | |
1774 | |
1775 // Do a slower implementation for cases that include non-ASCII character
s. | |
1776 if (ored & ~0x7F) { | |
1777 equal = true; | |
1778 for (unsigned i = 0; i != length; ++i) | |
1779 equal = equal && (foldCase(as[i]) == foldCase(b[i])); | |
1780 } | |
1781 | |
1782 return equal && !b[length]; | |
1783 } | |
1784 | |
1785 const UChar* as = a->characters16(); | |
1786 for (unsigned i = 0; i != length; ++i) { | |
1787 LChar bc = b[i]; | |
1788 if (!bc) | |
1789 return false; | |
1790 UChar ac = as[i]; | |
1791 ored |= ac; | |
1792 equal = equal && (toASCIILower(ac) == toASCIILower(bc)); | |
1793 } | |
1794 | |
1795 // Do a slower implementation for cases that include non-ASCII characters. | |
1796 if (ored & ~0x7F) { | |
1797 equal = true; | |
1798 for (unsigned i = 0; i != length; ++i) { | |
1799 equal = equal && (foldCase(as[i]) == foldCase(b[i])); | |
1800 } | |
1801 } | |
1802 | |
1803 return equal && !b[length]; | |
1804 } | |
1805 | |
1806 bool equalIgnoringCaseNonNull(const StringImpl* a, const StringImpl* b) | |
1807 { | |
1808 ASSERT(a && b); | |
1809 if (a == b) | |
1810 return true; | |
1811 | |
1812 unsigned length = a->length(); | |
1813 if (length != b->length()) | |
1814 return false; | |
1815 | |
1816 if (a->is8Bit()) { | |
1817 if (b->is8Bit()) | |
1818 return equalIgnoringCase(a->characters8(), b->characters8(), length)
; | |
1819 | |
1820 return equalIgnoringCase(b->characters16(), a->characters8(), length); | |
1821 } | |
1822 | |
1823 if (b->is8Bit()) | |
1824 return equalIgnoringCase(a->characters16(), b->characters8(), length); | |
1825 | |
1826 return equalIgnoringCase(a->characters16(), b->characters16(), length); | |
1827 } | |
1828 | |
1829 bool equalIgnoringNullity(StringImpl* a, StringImpl* b) | |
1830 { | |
1831 if (!a && b && !b->length()) | |
1832 return true; | |
1833 if (!b && a && !a->length()) | |
1834 return true; | |
1835 return equal(a, b); | |
1836 } | |
1837 | |
1838 WTF::Unicode::Direction StringImpl::defaultWritingDirection(bool* hasStrongDirec
tionality) | |
1839 { | |
1840 for (unsigned i = 0; i < m_length; ++i) { | |
1841 WTF::Unicode::Direction charDirection = WTF::Unicode::direction(is8Bit()
? m_data8[i] : m_data16[i]); | |
1842 if (charDirection == WTF::Unicode::LeftToRight) { | |
1843 if (hasStrongDirectionality) | |
1844 *hasStrongDirectionality = true; | |
1845 return WTF::Unicode::LeftToRight; | |
1846 } | |
1847 if (charDirection == WTF::Unicode::RightToLeft || charDirection == WTF::
Unicode::RightToLeftArabic) { | |
1848 if (hasStrongDirectionality) | |
1849 *hasStrongDirectionality = true; | |
1850 return WTF::Unicode::RightToLeft; | |
1851 } | |
1852 } | |
1853 if (hasStrongDirectionality) | |
1854 *hasStrongDirectionality = false; | |
1855 return WTF::Unicode::LeftToRight; | |
1856 } | |
1857 | |
1858 PassRefPtr<StringImpl> StringImpl::adopt(StringBuffer<LChar>& buffer) | |
1859 { | |
1860 unsigned length = buffer.length(); | |
1861 if (!length) | |
1862 return empty(); | |
1863 return adoptRef(new StringImpl(buffer.release(), length)); | |
1864 } | |
1865 | |
1866 PassRefPtr<StringImpl> StringImpl::adopt(StringBuffer<UChar>& buffer) | |
1867 { | |
1868 unsigned length = buffer.length(); | |
1869 if (!length) | |
1870 return empty(); | |
1871 return adoptRef(new StringImpl(buffer.release(), length)); | |
1872 } | |
1873 | |
1874 PassRefPtr<StringImpl> StringImpl::createWithTerminatingNullCharacter(const Stri
ngImpl& string) | |
1875 { | |
1876 // Use createUninitialized instead of 'new StringImpl' so that the string an
d its buffer | |
1877 // get allocated in a single memory block. | |
1878 unsigned length = string.m_length; | |
1879 RELEASE_ASSERT(length < numeric_limits<unsigned>::max()); | |
1880 RefPtr<StringImpl> terminatedString; | |
1881 if (string.is8Bit()) { | |
1882 LChar* data; | |
1883 terminatedString = createUninitialized(length + 1, data); | |
1884 memcpy(data, string.m_data8, length * sizeof(LChar)); | |
1885 data[length] = 0; | |
1886 } else { | |
1887 UChar* data; | |
1888 terminatedString = createUninitialized(length + 1, data); | |
1889 memcpy(data, string.m_data16, length * sizeof(UChar)); | |
1890 data[length] = 0; | |
1891 } | |
1892 --(terminatedString->m_length); | |
1893 terminatedString->m_hashAndFlags = (string.m_hashAndFlags & (~s_flagMask | s
_hashFlag8BitBuffer)) | s_hashFlagHasTerminatingNullCharacter; | |
1894 return terminatedString.release(); | |
1895 } | |
1896 | |
1897 size_t StringImpl::sizeInBytes() const | |
1898 { | |
1899 // FIXME: support substrings | |
1900 size_t size = length(); | |
1901 if (is8Bit()) { | |
1902 if (has16BitShadow()) { | |
1903 size += 2 * size; | |
1904 if (hasTerminatingNullCharacter()) | |
1905 size += 2; | |
1906 } | |
1907 } else | |
1908 size *= 2; | |
1909 return size + sizeof(*this); | |
1910 } | |
1911 | |
1912 } // namespace WTF | |
OLD | NEW |