| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org) | 2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org) |
| 3 * (C) 1999 Antti Koivisto (koivisto@kde.org) | 3 * (C) 1999 Antti Koivisto (koivisto@kde.org) |
| 4 * (C) 2001 Dirk Mueller ( mueller@kde.org ) | 4 * (C) 2001 Dirk Mueller ( mueller@kde.org ) |
| 5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All r
ights reserved. | 5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All r
ights reserved. |
| 6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net) | 6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net) |
| 7 * | 7 * |
| 8 * This library is free software; you can redistribute it and/or | 8 * This library is free software; you can redistribute it and/or |
| 9 * modify it under the terms of the GNU Library General Public | 9 * modify it under the terms of the GNU Library General Public |
| 10 * License as published by the Free Software Foundation; either | 10 * License as published by the Free Software Foundation; either |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 52 using namespace std; | 52 using namespace std; |
| 53 | 53 |
| 54 namespace WTF { | 54 namespace WTF { |
| 55 | 55 |
| 56 using namespace Unicode; | 56 using namespace Unicode; |
| 57 | 57 |
| 58 static_assert(sizeof(StringImpl) == 3 * sizeof(int), "StringImpl should stay sma
ll"); | 58 static_assert(sizeof(StringImpl) == 3 * sizeof(int), "StringImpl should stay sma
ll"); |
| 59 | 59 |
| 60 #ifdef STRING_STATS | 60 #ifdef STRING_STATS |
| 61 | 61 |
| 62 static Mutex& statsMutex() | 62 static Mutex& statsMutex() { |
| 63 { | 63 DEFINE_STATIC_LOCAL(Mutex, mutex, ()); |
| 64 DEFINE_STATIC_LOCAL(Mutex, mutex, ()); | 64 return mutex; |
| 65 return mutex; | 65 } |
| 66 } | 66 |
| 67 | 67 static HashSet<void*>& liveStrings() { |
| 68 static HashSet<void*>& liveStrings() | 68 // Notice that we can't use HashSet<StringImpl*> because then HashSet would de
dup identical strings. |
| 69 { | 69 DEFINE_STATIC_LOCAL(HashSet<void*>, strings, ()); |
| 70 // Notice that we can't use HashSet<StringImpl*> because then HashSet would
dedup identical strings. | 70 return strings; |
| 71 DEFINE_STATIC_LOCAL(HashSet<void*>, strings, ()); | 71 } |
| 72 return strings; | 72 |
| 73 } | 73 void addStringForStats(StringImpl* string) { |
| 74 | 74 MutexLocker locker(statsMutex()); |
| 75 void addStringForStats(StringImpl* string) | 75 liveStrings().add(string); |
| 76 { | 76 } |
| 77 MutexLocker locker(statsMutex()); | 77 |
| 78 liveStrings().add(string); | 78 void removeStringForStats(StringImpl* string) { |
| 79 } | 79 MutexLocker locker(statsMutex()); |
| 80 | 80 liveStrings().remove(string); |
| 81 void removeStringForStats(StringImpl* string) | 81 } |
| 82 { | 82 |
| 83 MutexLocker locker(statsMutex()); | 83 static void fillWithSnippet(const StringImpl* string, Vector<char>& snippet) { |
| 84 liveStrings().remove(string); | 84 const unsigned kMaxSnippetLength = 64; |
| 85 } | 85 snippet.clear(); |
| 86 | 86 |
| 87 static void fillWithSnippet(const StringImpl* string, Vector<char>& snippet) | 87 size_t expectedLength = std::min(string->length(), kMaxSnippetLength); |
| 88 { | 88 if (expectedLength == kMaxSnippetLength) |
| 89 const unsigned kMaxSnippetLength = 64; | 89 expectedLength += 3; // For the "...". |
| 90 snippet.clear(); | 90 ++expectedLength; // For the terminating '\0'. |
| 91 | 91 snippet.reserveCapacity(expectedLength); |
| 92 size_t expectedLength = std::min(string->length(), kMaxSnippetLength); | 92 |
| 93 if (expectedLength == kMaxSnippetLength) | 93 size_t i; |
| 94 expectedLength += 3; // For the "...". | 94 for (i = 0; i < string->length() && i < kMaxSnippetLength; ++i) { |
| 95 ++expectedLength; // For the terminating '\0'. | 95 UChar c = (*string)[i]; |
| 96 snippet.reserveCapacity(expectedLength); | 96 if (isASCIIPrintable(c)) |
| 97 | 97 snippet.append(c); |
| 98 size_t i; | 98 else |
| 99 for (i = 0; i < string->length() && i < kMaxSnippetLength; ++i) { | 99 snippet.append('?'); |
| 100 UChar c = (*string)[i]; | 100 } |
| 101 if (isASCIIPrintable(c)) | 101 if (i < string->length()) { |
| 102 snippet.append(c); | 102 snippet.append('.'); |
| 103 else | 103 snippet.append('.'); |
| 104 snippet.append('?'); | 104 snippet.append('.'); |
| 105 } | 105 } |
| 106 if (i < string->length()) { | 106 snippet.append('\0'); |
| 107 snippet.append('.'); | 107 } |
| 108 snippet.append('.'); | 108 |
| 109 snippet.append('.'); | 109 static bool isUnnecessarilyWide(const StringImpl* string) { |
| 110 } | 110 if (string->is8Bit()) |
| 111 snippet.append('\0'); | 111 return false; |
| 112 } | 112 UChar c = 0; |
| 113 | 113 for (unsigned i = 0; i < string->length(); ++i) |
| 114 static bool isUnnecessarilyWide(const StringImpl* string) | 114 c |= (*string)[i] >> 8; |
| 115 { | 115 return !c; |
| 116 if (string->is8Bit()) | |
| 117 return false; | |
| 118 UChar c = 0; | |
| 119 for (unsigned i = 0; i < string->length(); ++i) | |
| 120 c |= (*string)[i] >> 8; | |
| 121 return !c; | |
| 122 } | 116 } |
| 123 | 117 |
| 124 class PerStringStats : public RefCounted<PerStringStats> { | 118 class PerStringStats : public RefCounted<PerStringStats> { |
| 125 public: | 119 public: |
| 126 static PassRefPtr<PerStringStats> create() | 120 static PassRefPtr<PerStringStats> create() { |
| 127 { | 121 return adoptRef(new PerStringStats); |
| 128 return adoptRef(new PerStringStats); | 122 } |
| 129 } | 123 |
| 130 | 124 void add(const StringImpl* string) { |
| 131 void add(const StringImpl* string) | 125 ++m_numberOfCopies; |
| 132 { | 126 if (!m_length) { |
| 133 ++m_numberOfCopies; | 127 m_length = string->length(); |
| 134 if (!m_length) { | 128 fillWithSnippet(string, m_snippet); |
| 135 m_length = string->length(); | 129 } |
| 136 fillWithSnippet(string, m_snippet); | 130 if (string->isAtomic()) |
| 137 } | 131 ++m_numberOfAtomicCopies; |
| 138 if (string->isAtomic()) | 132 if (isUnnecessarilyWide(string)) |
| 139 ++m_numberOfAtomicCopies; | 133 m_unnecessarilyWide = true; |
| 140 if (isUnnecessarilyWide(string)) | 134 } |
| 141 m_unnecessarilyWide = true; | 135 |
| 142 } | 136 size_t totalCharacters() const { |
| 143 | 137 return m_numberOfCopies * m_length; |
| 144 size_t totalCharacters() const | 138 } |
| 145 { | 139 |
| 146 return m_numberOfCopies * m_length; | 140 void print() { |
| 147 } | 141 const char* status = "ok"; |
| 148 | 142 if (m_unnecessarilyWide) |
| 149 void print() | 143 status = "16"; |
| 150 { | 144 dataLogF("%8u copies (%s) of length %8u %s\n", m_numberOfCopies, status, m_l
ength, m_snippet.data()); |
| 151 const char* status = "ok"; | 145 } |
| 152 if (m_unnecessarilyWide) | 146 |
| 153 status = "16"; | 147 bool m_unnecessarilyWide; |
| 154 dataLogF("%8u copies (%s) of length %8u %s\n", m_numberOfCopies, status,
m_length, m_snippet.data()); | 148 unsigned m_numberOfCopies; |
| 155 } | 149 unsigned m_length; |
| 156 | 150 unsigned m_numberOfAtomicCopies; |
| 157 bool m_unnecessarilyWide; | 151 Vector<char> m_snippet; |
| 158 unsigned m_numberOfCopies; | 152 |
| 159 unsigned m_length; | 153 private: |
| 160 unsigned m_numberOfAtomicCopies; | 154 PerStringStats() |
| 161 Vector<char> m_snippet; | 155 : m_unnecessarilyWide(false), m_numberOfCopies(0), m_length(0), m_numberOf
AtomicCopies(0) { |
| 162 | 156 } |
| 163 private: | |
| 164 PerStringStats() | |
| 165 : m_unnecessarilyWide(false) | |
| 166 , m_numberOfCopies(0) | |
| 167 , m_length(0) | |
| 168 , m_numberOfAtomicCopies(0) | |
| 169 { | |
| 170 } | |
| 171 }; | 157 }; |
| 172 | 158 |
| 173 bool operator<(const RefPtr<PerStringStats>& a, const RefPtr<PerStringStats>& b) | 159 bool operator<(const RefPtr<PerStringStats>& a, const RefPtr<PerStringStats>& b)
{ |
| 174 { | 160 if (a->m_unnecessarilyWide != b->m_unnecessarilyWide) |
| 175 if (a->m_unnecessarilyWide != b->m_unnecessarilyWide) | 161 return !a->m_unnecessarilyWide && b->m_unnecessarilyWide; |
| 176 return !a->m_unnecessarilyWide && b->m_unnecessarilyWide; | 162 if (a->totalCharacters() != b->totalCharacters()) |
| 177 if (a->totalCharacters() != b->totalCharacters()) | 163 return a->totalCharacters() < b->totalCharacters(); |
| 178 return a->totalCharacters() < b->totalCharacters(); | 164 if (a->m_numberOfCopies != b->m_numberOfCopies) |
| 179 if (a->m_numberOfCopies != b->m_numberOfCopies) | 165 return a->m_numberOfCopies < b->m_numberOfCopies; |
| 180 return a->m_numberOfCopies < b->m_numberOfCopies; | 166 if (a->m_length != b->m_length) |
| 181 if (a->m_length != b->m_length) | 167 return a->m_length < b->m_length; |
| 182 return a->m_length < b->m_length; | 168 return a->m_numberOfAtomicCopies < b->m_numberOfAtomicCopies; |
| 183 return a->m_numberOfAtomicCopies < b->m_numberOfAtomicCopies; | 169 } |
| 184 } | 170 |
| 185 | 171 static void printLiveStringStats(void*) { |
| 186 static void printLiveStringStats(void*) | 172 MutexLocker locker(statsMutex()); |
| 187 { | 173 HashSet<void*>& strings = liveStrings(); |
| 188 MutexLocker locker(statsMutex()); | 174 |
| 189 HashSet<void*>& strings = liveStrings(); | 175 HashMap<StringImpl*, RefPtr<PerStringStats>> stats; |
| 190 | 176 for (HashSet<void*>::iterator iter = strings.begin(); iter != strings.end(); +
+iter) { |
| 191 HashMap<StringImpl*, RefPtr<PerStringStats>> stats; | 177 StringImpl* string = static_cast<StringImpl*>(*iter); |
| 192 for (HashSet<void*>::iterator iter = strings.begin(); iter != strings.end();
++iter) { | 178 HashMap<StringImpl*, RefPtr<PerStringStats>>::iterator entry = stats.find(st
ring); |
| 193 StringImpl* string = static_cast<StringImpl*>(*iter); | 179 RefPtr<PerStringStats> value = entry == stats.end() ? RefPtr<PerStringStats>
(PerStringStats::create()) : entry->value; |
| 194 HashMap<StringImpl*, RefPtr<PerStringStats>>::iterator entry = stats.fin
d(string); | 180 value->add(string); |
| 195 RefPtr<PerStringStats> value = entry == stats.end() ? RefPtr<PerStringSt
ats>(PerStringStats::create()) : entry->value; | 181 stats.set(string, value.release()); |
| 196 value->add(string); | 182 } |
| 197 stats.set(string, value.release()); | 183 |
| 198 } | 184 Vector<RefPtr<PerStringStats>> all; |
| 199 | 185 for (HashMap<StringImpl*, RefPtr<PerStringStats>>::iterator iter = stats.begin
(); iter != stats.end(); ++iter) |
| 200 Vector<RefPtr<PerStringStats>> all; | 186 all.append(iter->value); |
| 201 for (HashMap<StringImpl*, RefPtr<PerStringStats>>::iterator iter = stats.beg
in(); iter != stats.end(); ++iter) | 187 |
| 202 all.append(iter->value); | 188 std::sort(all.begin(), all.end()); |
| 203 | 189 std::reverse(all.begin(), all.end()); |
| 204 std::sort(all.begin(), all.end()); | 190 for (size_t i = 0; i < 20 && i < all.size(); ++i) |
| 205 std::reverse(all.begin(), all.end()); | 191 all[i]->print(); |
| 206 for (size_t i = 0; i < 20 && i < all.size(); ++i) | |
| 207 all[i]->print(); | |
| 208 } | 192 } |
| 209 | 193 |
| 210 StringStats StringImpl::m_stringStats; | 194 StringStats StringImpl::m_stringStats; |
| 211 | 195 |
| 212 unsigned StringStats::s_stringRemovesTillPrintStats = StringStats::s_printString
StatsFrequency; | 196 unsigned StringStats::s_stringRemovesTillPrintStats = StringStats::s_printString
StatsFrequency; |
| 213 | 197 |
| 214 void StringStats::removeString(StringImpl* string) | 198 void StringStats::removeString(StringImpl* string) { |
| 215 { | 199 unsigned length = string->length(); |
| 216 unsigned length = string->length(); | 200 --m_totalNumberStrings; |
| 217 --m_totalNumberStrings; | 201 |
| 218 | 202 if (string->is8Bit()) { |
| 219 if (string->is8Bit()) { | 203 --m_number8BitStrings; |
| 220 --m_number8BitStrings; | 204 m_total8BitData -= length; |
| 221 m_total8BitData -= length; | 205 } else { |
| 222 } else { | 206 --m_number16BitStrings; |
| 223 --m_number16BitStrings; | 207 m_total16BitData -= length; |
| 224 m_total16BitData -= length; | 208 } |
| 225 } | 209 |
| 226 | 210 if (!--s_stringRemovesTillPrintStats) { |
| 227 if (!--s_stringRemovesTillPrintStats) { | 211 s_stringRemovesTillPrintStats = s_printStringStatsFrequency; |
| 228 s_stringRemovesTillPrintStats = s_printStringStatsFrequency; | 212 printStats(); |
| 229 printStats(); | 213 } |
| 230 } | 214 } |
| 231 } | 215 |
| 232 | 216 void StringStats::printStats() { |
| 233 void StringStats::printStats() | 217 dataLogF("String stats for process id %d:\n", getpid()); |
| 234 { | 218 |
| 235 dataLogF("String stats for process id %d:\n", getpid()); | 219 unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitData; |
| 236 | 220 double percent8Bit = m_totalNumberStrings ? ((double)m_number8BitStrings * 100
) / (double)m_totalNumberStrings : 0.0; |
| 237 unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitDat
a; | 221 double average8bitLength = m_number8BitStrings ? (double)m_total8BitData / (do
uble)m_number8BitStrings : 0.0; |
| 238 double percent8Bit = m_totalNumberStrings ? ((double)m_number8BitStrings * 1
00) / (double)m_totalNumberStrings : 0.0; | 222 dataLogF("%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length %6
.1f\n", m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData, aver
age8bitLength); |
| 239 double average8bitLength = m_number8BitStrings ? (double)m_total8BitData / (
double)m_number8BitStrings : 0.0; | 223 |
| 240 dataLogF("%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length
%6.1f\n", m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData, av
erage8bitLength); | 224 double percent16Bit = m_totalNumberStrings ? ((double)m_number16BitStrings * 1
00) / (double)m_totalNumberStrings : 0.0; |
| 241 | 225 double average16bitLength = m_number16BitStrings ? (double)m_total16BitData /
(double)m_number16BitStrings : 0.0; |
| 242 double percent16Bit = m_totalNumberStrings ? ((double)m_number16BitStrings *
100) / (double)m_totalNumberStrings : 0.0; | 226 dataLogF("%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length %6
.1f\n", m_number16BitStrings, percent16Bit, m_total16BitData, m_total16BitData *
2, average16bitLength); |
| 243 double average16bitLength = m_number16BitStrings ? (double)m_total16BitData
/ (double)m_number16BitStrings : 0.0; | 227 |
| 244 dataLogF("%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length
%6.1f\n", m_number16BitStrings, percent16Bit, m_total16BitData, m_total16BitData
* 2, average16bitLength); | 228 double averageLength = m_totalNumberStrings ? (double)totalNumberCharacters /
(double)m_totalNumberStrings : 0.0; |
| 245 | 229 unsigned long long totalDataBytes = m_total8BitData + m_total16BitData * 2; |
| 246 double averageLength = m_totalNumberStrings ? (double)totalNumberCharacters
/ (double)m_totalNumberStrings : 0.0; | 230 dataLogF("%8u Total %12llu chars %12llu bytes avg length %6.
1f\n", m_totalNumberStrings, totalNumberCharacters, totalDataBytes, averageLengt
h); |
| 247 unsigned long long totalDataBytes = m_total8BitData + m_total16BitData * 2; | 231 unsigned long long totalSavedBytes = m_total8BitData; |
| 248 dataLogF("%8u Total %12llu chars %12llu bytes avg length %
6.1f\n", m_totalNumberStrings, totalNumberCharacters, totalDataBytes, averageLen
gth); | 232 double percentSavings = totalSavedBytes ? ((double)totalSavedBytes * 100) / (d
ouble)(totalDataBytes + totalSavedBytes) : 0.0; |
| 249 unsigned long long totalSavedBytes = m_total8BitData; | 233 dataLogF(" Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes, p
ercentSavings); |
| 250 double percentSavings = totalSavedBytes ? ((double)totalSavedBytes * 100) /
(double)(totalDataBytes + totalSavedBytes) : 0.0; | 234 |
| 251 dataLogF(" Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes,
percentSavings); | 235 unsigned totalOverhead = m_totalNumberStrings * sizeof(StringImpl); |
| 252 | 236 double overheadPercent = (double)totalOverhead / (double)totalDataBytes * 100; |
| 253 unsigned totalOverhead = m_totalNumberStrings * sizeof(StringImpl); | 237 dataLogF(" StringImpl overheader: %8u (%5.2f%%)\n", totalOverhead, ove
rheadPercent); |
| 254 double overheadPercent = (double)totalOverhead / (double)totalDataBytes * 10
0; | 238 |
| 255 dataLogF(" StringImpl overheader: %8u (%5.2f%%)\n", totalOverhead, o
verheadPercent); | 239 internal::callOnMainThread(&printLiveStringStats, nullptr); |
| 256 | |
| 257 internal::callOnMainThread(&printLiveStringStats, nullptr); | |
| 258 } | 240 } |
| 259 #endif | 241 #endif |
| 260 | 242 |
| 261 void* StringImpl::operator new(size_t size) | 243 void* StringImpl::operator new(size_t size) { |
| 262 { | 244 ASSERT(size == sizeof(StringImpl)); |
| 263 ASSERT(size == sizeof(StringImpl)); | 245 return Partitions::bufferMalloc(size); |
| 264 return Partitions::bufferMalloc(size); | 246 } |
| 265 } | 247 |
| 266 | 248 void StringImpl::operator delete(void* ptr) { |
| 267 void StringImpl::operator delete(void* ptr) | 249 Partitions::bufferFree(ptr); |
| 268 { | 250 } |
| 269 Partitions::bufferFree(ptr); | 251 |
| 270 } | 252 inline StringImpl::~StringImpl() { |
| 271 | 253 ASSERT(!isStatic()); |
| 272 inline StringImpl::~StringImpl() | 254 |
| 273 { | 255 STRING_STATS_REMOVE_STRING(this); |
| 274 ASSERT(!isStatic()); | 256 |
| 275 | 257 if (isAtomic()) |
| 276 STRING_STATS_REMOVE_STRING(this); | 258 AtomicString::remove(this); |
| 277 | 259 } |
| 278 if (isAtomic()) | 260 |
| 279 AtomicString::remove(this); | 261 void StringImpl::destroyIfNotStatic() { |
| 280 } | 262 if (!isStatic()) |
| 281 | 263 delete this; |
| 282 void StringImpl::destroyIfNotStatic() | 264 } |
| 283 { | 265 |
| 284 if (!isStatic()) | 266 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, LChar*&
data) { |
| 285 delete this; | 267 if (!length) { |
| 286 } | 268 data = 0; |
| 287 | 269 return empty(); |
| 288 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, LChar*&
data) | 270 } |
| 289 { | 271 |
| 290 if (!length) { | 272 // Allocate a single buffer large enough to contain the StringImpl |
| 291 data = 0; | 273 // struct as well as the data which it contains. This removes one |
| 292 return empty(); | 274 // heap allocation from this call. |
| 293 } | 275 StringImpl* string = static_cast<StringImpl*>(Partitions::bufferMalloc(allocat
ionSize<LChar>(length))); |
| 294 | 276 |
| 295 // Allocate a single buffer large enough to contain the StringImpl | 277 data = reinterpret_cast<LChar*>(string + 1); |
| 296 // struct as well as the data which it contains. This removes one | 278 return adoptRef(new (string) StringImpl(length, Force8BitConstructor)); |
| 297 // heap allocation from this call. | 279 } |
| 298 StringImpl* string = static_cast<StringImpl*>(Partitions::bufferMalloc(alloc
ationSize<LChar>(length))); | 280 |
| 299 | 281 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*&
data) { |
| 300 data = reinterpret_cast<LChar*>(string + 1); | 282 if (!length) { |
| 301 return adoptRef(new (string) StringImpl(length, Force8BitConstructor)); | 283 data = 0; |
| 302 } | 284 return empty(); |
| 303 | 285 } |
| 304 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*&
data) | 286 |
| 305 { | 287 // Allocate a single buffer large enough to contain the StringImpl |
| 306 if (!length) { | 288 // struct as well as the data which it contains. This removes one |
| 307 data = 0; | 289 // heap allocation from this call. |
| 308 return empty(); | 290 StringImpl* string = static_cast<StringImpl*>(Partitions::bufferMalloc(allocat
ionSize<UChar>(length))); |
| 309 } | 291 |
| 310 | 292 data = reinterpret_cast<UChar*>(string + 1); |
| 311 // Allocate a single buffer large enough to contain the StringImpl | 293 return adoptRef(new (string) StringImpl(length)); |
| 312 // struct as well as the data which it contains. This removes one | 294 } |
| 313 // heap allocation from this call. | 295 |
| 314 StringImpl* string = static_cast<StringImpl*>(Partitions::bufferMalloc(alloc
ationSize<UChar>(length))); | 296 static StaticStringsTable& staticStrings() { |
| 315 | 297 DEFINE_STATIC_LOCAL(StaticStringsTable, staticStrings, ()); |
| 316 data = reinterpret_cast<UChar*>(string + 1); | 298 return staticStrings; |
| 317 return adoptRef(new (string) StringImpl(length)); | |
| 318 } | |
| 319 | |
| 320 static StaticStringsTable& staticStrings() | |
| 321 { | |
| 322 DEFINE_STATIC_LOCAL(StaticStringsTable, staticStrings, ()); | |
| 323 return staticStrings; | |
| 324 } | 299 } |
| 325 | 300 |
| 326 #if ENABLE(ASSERT) | 301 #if ENABLE(ASSERT) |
| 327 static bool s_allowCreationOfStaticStrings = true; | 302 static bool s_allowCreationOfStaticStrings = true; |
| 328 #endif | 303 #endif |
| 329 | 304 |
| 330 const StaticStringsTable& StringImpl::allStaticStrings() | 305 const StaticStringsTable& StringImpl::allStaticStrings() { |
| 331 { | 306 return staticStrings(); |
| 332 return staticStrings(); | 307 } |
| 333 } | 308 |
| 334 | 309 void StringImpl::freezeStaticStrings() { |
| 335 void StringImpl::freezeStaticStrings() | 310 ASSERT(isMainThread()); |
| 336 { | |
| 337 ASSERT(isMainThread()); | |
| 338 | 311 |
| 339 #if ENABLE(ASSERT) | 312 #if ENABLE(ASSERT) |
| 340 s_allowCreationOfStaticStrings = false; | 313 s_allowCreationOfStaticStrings = false; |
| 341 #endif | 314 #endif |
| 342 } | 315 } |
| 343 | 316 |
| 344 unsigned StringImpl::m_highestStaticStringLength = 0; | 317 unsigned StringImpl::m_highestStaticStringLength = 0; |
| 345 | 318 |
| 346 StringImpl* StringImpl::createStatic(const char* string, unsigned length, unsign
ed hash) | 319 StringImpl* StringImpl::createStatic(const char* string, unsigned length, unsign
ed hash) { |
| 347 { | 320 ASSERT(s_allowCreationOfStaticStrings); |
| 348 ASSERT(s_allowCreationOfStaticStrings); | 321 ASSERT(string); |
| 349 ASSERT(string); | 322 ASSERT(length); |
| 350 ASSERT(length); | 323 |
| 351 | 324 StaticStringsTable::const_iterator it = staticStrings().find(hash); |
| 352 StaticStringsTable::const_iterator it = staticStrings().find(hash); | 325 if (it != staticStrings().end()) { |
| 353 if (it != staticStrings().end()) { | 326 ASSERT(!memcmp(string, it->value + 1, length * sizeof(LChar))); |
| 354 ASSERT(!memcmp(string, it->value + 1, length * sizeof(LChar))); | 327 return it->value; |
| 355 return it->value; | 328 } |
| 356 } | 329 |
| 357 | 330 // Allocate a single buffer large enough to contain the StringImpl |
| 358 // Allocate a single buffer large enough to contain the StringImpl | 331 // struct as well as the data which it contains. This removes one |
| 359 // struct as well as the data which it contains. This removes one | 332 // heap allocation from this call. |
| 360 // heap allocation from this call. | 333 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(Strin
gImpl)) / sizeof(LChar))); |
| 361 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(Str
ingImpl)) / sizeof(LChar))); | 334 size_t size = sizeof(StringImpl) + length * sizeof(LChar); |
| 362 size_t size = sizeof(StringImpl) + length * sizeof(LChar); | 335 |
| 363 | 336 WTF_ANNOTATE_SCOPED_MEMORY_LEAK; |
| 364 WTF_ANNOTATE_SCOPED_MEMORY_LEAK; | 337 StringImpl* impl = static_cast<StringImpl*>(Partitions::bufferMalloc(size)); |
| 365 StringImpl* impl = static_cast<StringImpl*>(Partitions::bufferMalloc(size)); | 338 |
| 366 | 339 LChar* data = reinterpret_cast<LChar*>(impl + 1); |
| 367 LChar* data = reinterpret_cast<LChar*>(impl + 1); | 340 impl = new (impl) StringImpl(length, hash, StaticString); |
| 368 impl = new (impl) StringImpl(length, hash, StaticString); | 341 memcpy(data, string, length * sizeof(LChar)); |
| 369 memcpy(data, string, length * sizeof(LChar)); | |
| 370 #if ENABLE(ASSERT) | 342 #if ENABLE(ASSERT) |
| 371 impl->assertHashIsCorrect(); | 343 impl->assertHashIsCorrect(); |
| 372 #endif | 344 #endif |
| 373 | 345 |
| 374 ASSERT(isMainThread()); | 346 ASSERT(isMainThread()); |
| 375 m_highestStaticStringLength = std::max(m_highestStaticStringLength, length); | 347 m_highestStaticStringLength = std::max(m_highestStaticStringLength, length); |
| 376 staticStrings().add(hash, impl); | 348 staticStrings().add(hash, impl); |
| 377 WTF_ANNOTATE_BENIGN_RACE(impl, | 349 WTF_ANNOTATE_BENIGN_RACE(impl, |
| 378 "Benign race on the reference counter of a static string created by Stri
ngImpl::createStatic"); | 350 "Benign race on the reference counter of a static str
ing created by StringImpl::createStatic"); |
| 379 | 351 |
| 380 return impl; | 352 return impl; |
| 381 } | 353 } |
| 382 | 354 |
| 383 void StringImpl::reserveStaticStringsCapacityForSize(unsigned size) | 355 void StringImpl::reserveStaticStringsCapacityForSize(unsigned size) { |
| 384 { | 356 ASSERT(s_allowCreationOfStaticStrings); |
| 385 ASSERT(s_allowCreationOfStaticStrings); | 357 staticStrings().reserveCapacityForSize(size); |
| 386 staticStrings().reserveCapacityForSize(size); | 358 } |
| 387 } | 359 |
| 388 | 360 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned leng
th) { |
| 389 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned leng
th) | 361 if (!characters || !length) |
| 390 { | 362 return empty(); |
| 391 if (!characters || !length) | 363 |
| 392 return empty(); | 364 UChar* data; |
| 393 | 365 RefPtr<StringImpl> string = createUninitialized(length, data); |
| 394 UChar* data; | 366 memcpy(data, characters, length * sizeof(UChar)); |
| 395 RefPtr<StringImpl> string = createUninitialized(length, data); | 367 return string.release(); |
| 396 memcpy(data, characters, length * sizeof(UChar)); | 368 } |
| 397 return string.release(); | 369 |
| 398 } | 370 PassRefPtr<StringImpl> StringImpl::create(const LChar* characters, unsigned leng
th) { |
| 399 | 371 if (!characters || !length) |
| 400 PassRefPtr<StringImpl> StringImpl::create(const LChar* characters, unsigned leng
th) | 372 return empty(); |
| 401 { | 373 |
| 402 if (!characters || !length) | 374 LChar* data; |
| 403 return empty(); | 375 RefPtr<StringImpl> string = createUninitialized(length, data); |
| 404 | 376 memcpy(data, characters, length * sizeof(LChar)); |
| 405 LChar* data; | 377 return string.release(); |
| 406 RefPtr<StringImpl> string = createUninitialized(length, data); | 378 } |
| 407 memcpy(data, characters, length * sizeof(LChar)); | 379 |
| 408 return string.release(); | 380 PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters,
unsigned length) { |
| 409 } | 381 if (!characters || !length) |
| 410 | 382 return empty(); |
| 411 PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters,
unsigned length) | 383 |
| 412 { | 384 LChar* data; |
| 413 if (!characters || !length) | 385 RefPtr<StringImpl> string = createUninitialized(length, data); |
| 414 return empty(); | 386 |
| 415 | 387 for (size_t i = 0; i < length; ++i) { |
| 416 LChar* data; | 388 if (characters[i] & 0xff00) |
| 417 RefPtr<StringImpl> string = createUninitialized(length, data); | 389 return create(characters, length); |
| 418 | 390 data[i] = static_cast<LChar>(characters[i]); |
| 419 for (size_t i = 0; i < length; ++i) { | 391 } |
| 420 if (characters[i] & 0xff00) | 392 |
| 421 return create(characters, length); | 393 return string.release(); |
| 422 data[i] = static_cast<LChar>(characters[i]); | 394 } |
| 423 } | 395 |
| 424 | 396 PassRefPtr<StringImpl> StringImpl::create(const LChar* string) { |
| 425 return string.release(); | 397 if (!string) |
| 426 } | 398 return empty(); |
| 427 | 399 size_t length = strlen(reinterpret_cast<const char*>(string)); |
| 428 PassRefPtr<StringImpl> StringImpl::create(const LChar* string) | 400 RELEASE_ASSERT(length <= numeric_limits<unsigned>::max()); |
| 429 { | 401 return create(string, length); |
| 430 if (!string) | 402 } |
| 431 return empty(); | 403 |
| 432 size_t length = strlen(reinterpret_cast<const char*>(string)); | 404 bool StringImpl::containsOnlyWhitespace() { |
| 433 RELEASE_ASSERT(length <= numeric_limits<unsigned>::max()); | 405 // FIXME: The definition of whitespace here includes a number of characters |
| 434 return create(string, length); | 406 // that are not whitespace from the point of view of LayoutText; I wonder if |
| 435 } | 407 // that's a problem in practice. |
| 436 | 408 if (is8Bit()) { |
| 437 bool StringImpl::containsOnlyWhitespace() | |
| 438 { | |
| 439 // FIXME: The definition of whitespace here includes a number of characters | |
| 440 // that are not whitespace from the point of view of LayoutText; I wonder if | |
| 441 // that's a problem in practice. | |
| 442 if (is8Bit()) { | |
| 443 for (unsigned i = 0; i < m_length; ++i) { | |
| 444 UChar c = characters8()[i]; | |
| 445 if (!isASCIISpace(c)) | |
| 446 return false; | |
| 447 } | |
| 448 | |
| 449 return true; | |
| 450 } | |
| 451 | |
| 452 for (unsigned i = 0; i < m_length; ++i) { | 409 for (unsigned i = 0; i < m_length; ++i) { |
| 453 UChar c = characters16()[i]; | 410 UChar c = characters8()[i]; |
| 454 if (!isASCIISpace(c)) | 411 if (!isASCIISpace(c)) |
| 455 return false; | 412 return false; |
| 456 } | 413 } |
| 414 |
| 457 return true; | 415 return true; |
| 458 } | 416 } |
| 459 | 417 |
| 460 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length) | 418 for (unsigned i = 0; i < m_length; ++i) { |
| 461 { | 419 UChar c = characters16()[i]; |
| 462 if (start >= m_length) | 420 if (!isASCIISpace(c)) |
| 463 return empty(); | 421 return false; |
| 464 unsigned maxLength = m_length - start; | 422 } |
| 465 if (length >= maxLength) { | 423 return true; |
| 466 if (!start) | 424 } |
| 467 return this; | 425 |
| 468 length = maxLength; | 426 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length) { |
| 469 } | 427 if (start >= m_length) |
| 470 if (is8Bit()) | 428 return empty(); |
| 471 return create(characters8() + start, length); | 429 unsigned maxLength = m_length - start; |
| 472 | 430 if (length >= maxLength) { |
| 473 return create(characters16() + start, length); | 431 if (!start) |
| 474 } | 432 return this; |
| 475 | 433 length = maxLength; |
| 476 UChar32 StringImpl::characterStartingAt(unsigned i) | 434 } |
| 477 { | 435 if (is8Bit()) |
| 478 if (is8Bit()) | 436 return create(characters8() + start, length); |
| 479 return characters8()[i]; | 437 |
| 480 if (U16_IS_SINGLE(characters16()[i])) | 438 return create(characters16() + start, length); |
| 481 return characters16()[i]; | 439 } |
| 482 if (i + 1 < m_length && U16_IS_LEAD(characters16()[i]) && U16_IS_TRAIL(chara
cters16()[i + 1])) | 440 |
| 483 return U16_GET_SUPPLEMENTARY(characters16()[i], characters16()[i + 1]); | 441 UChar32 StringImpl::characterStartingAt(unsigned i) { |
| 484 return 0; | 442 if (is8Bit()) |
| 485 } | 443 return characters8()[i]; |
| 486 | 444 if (U16_IS_SINGLE(characters16()[i])) |
| 487 PassRefPtr<StringImpl> StringImpl::lower() | 445 return characters16()[i]; |
| 488 { | 446 if (i + 1 < m_length && U16_IS_LEAD(characters16()[i]) && U16_IS_TRAIL(charact
ers16()[i + 1])) |
| 489 // Note: This is a hot function in the Dromaeo benchmark, specifically the | 447 return U16_GET_SUPPLEMENTARY(characters16()[i], characters16()[i + 1]); |
| 490 // no-op code path up through the first 'return' statement. | 448 return 0; |
| 491 | 449 } |
| 492 // First scan the string for uppercase and non-ASCII characters: | 450 |
| 493 if (is8Bit()) { | 451 PassRefPtr<StringImpl> StringImpl::lower() { |
| 494 unsigned firstIndexToBeLowered = m_length; | 452 // Note: This is a hot function in the Dromaeo benchmark, specifically the |
| 495 for (unsigned i = 0; i < m_length; ++i) { | 453 // no-op code path up through the first 'return' statement. |
| 496 LChar ch = characters8()[i]; | 454 |
| 497 if (UNLIKELY(isASCIIUpper(ch) || ch & ~0x7F)) { | 455 // First scan the string for uppercase and non-ASCII characters: |
| 498 firstIndexToBeLowered = i; | 456 if (is8Bit()) { |
| 499 break; | 457 unsigned firstIndexToBeLowered = m_length; |
| 500 } | 458 for (unsigned i = 0; i < m_length; ++i) { |
| 501 } | 459 LChar ch = characters8()[i]; |
| 502 | 460 if (UNLIKELY(isASCIIUpper(ch) || ch & ~0x7F)) { |
| 503 // Nothing to do if the string is all ASCII with no uppercase. | 461 firstIndexToBeLowered = i; |
| 504 if (firstIndexToBeLowered == m_length) | 462 break; |
| 505 return this; | 463 } |
| 506 | 464 } |
| 507 LChar* data8; | 465 |
| 508 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8); | |
| 509 memcpy(data8, characters8(), firstIndexToBeLowered); | |
| 510 | |
| 511 for (unsigned i = firstIndexToBeLowered; i < m_length; ++i) { | |
| 512 LChar ch = characters8()[i]; | |
| 513 data8[i] = UNLIKELY(ch & ~0x7F) | |
| 514 ? static_cast<LChar>(Unicode::toLower(ch)) : toASCIILower(ch); | |
| 515 } | |
| 516 | |
| 517 return newImpl.release(); | |
| 518 } | |
| 519 | |
| 520 bool noUpper = true; | |
| 521 UChar ored = 0; | |
| 522 | |
| 523 const UChar* end = characters16() + m_length; | |
| 524 for (const UChar* chp = characters16(); chp != end; ++chp) { | |
| 525 if (UNLIKELY(isASCIIUpper(*chp))) | |
| 526 noUpper = false; | |
| 527 ored |= *chp; | |
| 528 } | |
| 529 // Nothing to do if the string is all ASCII with no uppercase. | 466 // Nothing to do if the string is all ASCII with no uppercase. |
| 530 if (noUpper && !(ored & ~0x7F)) | 467 if (firstIndexToBeLowered == m_length) |
| 531 return this; | 468 return this; |
| 532 | 469 |
| 533 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::ma
x())); | 470 LChar* data8; |
| 534 int32_t length = m_length; | 471 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8); |
| 535 | 472 memcpy(data8, characters8(), firstIndexToBeLowered); |
| 536 if (!(ored & ~0x7F)) { | 473 |
| 537 UChar* data16; | 474 for (unsigned i = firstIndexToBeLowered; i < m_length; ++i) { |
| 538 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); | 475 LChar ch = characters8()[i]; |
| 539 | 476 data8[i] = UNLIKELY(ch & ~0x7F) |
| 540 for (int32_t i = 0; i < length; ++i) { | 477 ? static_cast<LChar>(Unicode::toLower(ch)) |
| 541 UChar c = characters16()[i]; | 478 : toASCIILower(ch); |
| 542 data16[i] = toASCIILower(c); | 479 } |
| 543 } | 480 |
| 544 return newImpl.release(); | 481 return newImpl.release(); |
| 545 } | 482 } |
| 546 | 483 |
| 547 // Do a slower implementation for cases that include non-ASCII characters. | 484 bool noUpper = true; |
| 485 UChar ored = 0; |
| 486 |
| 487 const UChar* end = characters16() + m_length; |
| 488 for (const UChar* chp = characters16(); chp != end; ++chp) { |
| 489 if (UNLIKELY(isASCIIUpper(*chp))) |
| 490 noUpper = false; |
| 491 ored |= *chp; |
| 492 } |
| 493 // Nothing to do if the string is all ASCII with no uppercase. |
| 494 if (noUpper && !(ored & ~0x7F)) |
| 495 return this; |
| 496 |
| 497 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max(
))); |
| 498 int32_t length = m_length; |
| 499 |
| 500 if (!(ored & ~0x7F)) { |
| 548 UChar* data16; | 501 UChar* data16; |
| 549 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); | 502 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); |
| 550 | 503 |
| 551 bool error; | 504 for (int32_t i = 0; i < length; ++i) { |
| 552 int32_t realLength = Unicode::toLower(data16, length, characters16(), m_leng
th, &error); | 505 UChar c = characters16()[i]; |
| 553 if (!error && realLength == length) | 506 data16[i] = toASCIILower(c); |
| 554 return newImpl.release(); | 507 } |
| 555 | |
| 556 newImpl = createUninitialized(realLength, data16); | |
| 557 Unicode::toLower(data16, realLength, characters16(), m_length, &error); | |
| 558 if (error) | |
| 559 return this; | |
| 560 return newImpl.release(); | 508 return newImpl.release(); |
| 561 } | 509 } |
| 562 | 510 |
| 563 PassRefPtr<StringImpl> StringImpl::upper() | 511 // Do a slower implementation for cases that include non-ASCII characters. |
| 564 { | 512 UChar* data16; |
| 565 // This function could be optimized for no-op cases the way lower() is, | 513 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); |
| 566 // but in empirical testing, few actual calls to upper() are no-ops, so | 514 |
| 567 // it wouldn't be worth the extra time for pre-scanning. | 515 bool error; |
| 568 | 516 int32_t realLength = Unicode::toLower(data16, length, characters16(), m_length
, &error); |
| 569 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::ma
x())); | 517 if (!error && realLength == length) |
| 570 int32_t length = m_length; | 518 return newImpl.release(); |
| 571 | 519 |
| 572 if (is8Bit()) { | 520 newImpl = createUninitialized(realLength, data16); |
| 573 LChar* data8; | 521 Unicode::toLower(data16, realLength, characters16(), m_length, &error); |
| 574 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8); | 522 if (error) |
| 575 | 523 return this; |
| 576 // Do a faster loop for the case where all the characters are ASCII. | 524 return newImpl.release(); |
| 577 LChar ored = 0; | 525 } |
| 578 for (int i = 0; i < length; ++i) { | 526 |
| 579 LChar c = characters8()[i]; | 527 PassRefPtr<StringImpl> StringImpl::upper() { |
| 580 ored |= c; | 528 // This function could be optimized for no-op cases the way lower() is, |
| 581 data8[i] = toASCIIUpper(c); | 529 // but in empirical testing, few actual calls to upper() are no-ops, so |
| 582 } | 530 // it wouldn't be worth the extra time for pre-scanning. |
| 583 if (!(ored & ~0x7F)) | 531 |
| 584 return newImpl.release(); | 532 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max(
))); |
| 585 | 533 int32_t length = m_length; |
| 586 // Do a slower implementation for cases that include non-ASCII Latin-1 c
haracters. | 534 |
| 587 int numberSharpSCharacters = 0; | 535 if (is8Bit()) { |
| 588 | 536 LChar* data8; |
| 589 // There are two special cases. | 537 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8); |
| 590 // 1. latin-1 characters when converted to upper case are 16 bit charac
ters. | 538 |
| 591 // 2. Lower case sharp-S converts to "SS" (two characters) | 539 // Do a faster loop for the case where all the characters are ASCII. |
| 592 for (int32_t i = 0; i < length; ++i) { | 540 LChar ored = 0; |
| 593 LChar c = characters8()[i]; | 541 for (int i = 0; i < length; ++i) { |
| 594 if (UNLIKELY(c == smallLetterSharpSCharacter)) | 542 LChar c = characters8()[i]; |
| 595 ++numberSharpSCharacters; | 543 ored |= c; |
| 596 UChar upper = static_cast<UChar>(Unicode::toUpper(c)); | 544 data8[i] = toASCIIUpper(c); |
| 597 if (UNLIKELY(upper > 0xff)) { | 545 } |
| 598 // Since this upper-cased character does not fit in an 8-bit str
ing, we need to take the 16-bit path. | 546 if (!(ored & ~0x7F)) |
| 599 goto upconvert; | 547 return newImpl.release(); |
| 600 } | 548 |
| 601 data8[i] = static_cast<LChar>(upper); | 549 // Do a slower implementation for cases that include non-ASCII Latin-1 chara
cters. |
| 602 } | 550 int numberSharpSCharacters = 0; |
| 603 | 551 |
| 604 if (!numberSharpSCharacters) | 552 // There are two special cases. |
| 605 return newImpl.release(); | 553 // 1. latin-1 characters when converted to upper case are 16 bit characters
. |
| 606 | 554 // 2. Lower case sharp-S converts to "SS" (two characters) |
| 607 // We have numberSSCharacters sharp-s characters, but none of the other
special characters. | 555 for (int32_t i = 0; i < length; ++i) { |
| 608 newImpl = createUninitialized(m_length + numberSharpSCharacters, data8); | 556 LChar c = characters8()[i]; |
| 609 | 557 if (UNLIKELY(c == smallLetterSharpSCharacter)) |
| 610 LChar* dest = data8; | 558 ++numberSharpSCharacters; |
| 611 | 559 UChar upper = static_cast<UChar>(Unicode::toUpper(c)); |
| 612 for (int32_t i = 0; i < length; ++i) { | 560 if (UNLIKELY(upper > 0xff)) { |
| 613 LChar c = characters8()[i]; | 561 // Since this upper-cased character does not fit in an 8-bit string, we
need to take the 16-bit path. |
| 614 if (c == smallLetterSharpSCharacter) { | 562 goto upconvert; |
| 615 *dest++ = 'S'; | 563 } |
| 616 *dest++ = 'S'; | 564 data8[i] = static_cast<LChar>(upper); |
| 617 } else { | 565 } |
| 618 *dest++ = static_cast<LChar>(Unicode::toUpper(c)); | 566 |
| 619 } | 567 if (!numberSharpSCharacters) |
| 620 } | 568 return newImpl.release(); |
| 621 | 569 |
| 622 return newImpl.release(); | 570 // We have numberSSCharacters sharp-s characters, but none of the other spec
ial characters. |
| 623 } | 571 newImpl = createUninitialized(m_length + numberSharpSCharacters, data8); |
| 572 |
| 573 LChar* dest = data8; |
| 574 |
| 575 for (int32_t i = 0; i < length; ++i) { |
| 576 LChar c = characters8()[i]; |
| 577 if (c == smallLetterSharpSCharacter) { |
| 578 *dest++ = 'S'; |
| 579 *dest++ = 'S'; |
| 580 } else { |
| 581 *dest++ = static_cast<LChar>(Unicode::toUpper(c)); |
| 582 } |
| 583 } |
| 584 |
| 585 return newImpl.release(); |
| 586 } |
| 624 | 587 |
| 625 upconvert: | 588 upconvert: |
| 626 RefPtr<StringImpl> upconverted = upconvertedString(); | 589 RefPtr<StringImpl> upconverted = upconvertedString(); |
| 627 const UChar* source16 = upconverted->characters16(); | 590 const UChar* source16 = upconverted->characters16(); |
| 628 | 591 |
| 629 UChar* data16; | 592 UChar* data16; |
| 630 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); | 593 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); |
| 631 | 594 |
| 632 // Do a faster loop for the case where all the characters are ASCII. | 595 // Do a faster loop for the case where all the characters are ASCII. |
| 633 UChar ored = 0; | 596 UChar ored = 0; |
| 634 for (int i = 0; i < length; ++i) { | 597 for (int i = 0; i < length; ++i) { |
| 635 UChar c = source16[i]; | 598 UChar c = source16[i]; |
| 636 ored |= c; | 599 ored |= c; |
| 637 data16[i] = toASCIIUpper(c); | 600 data16[i] = toASCIIUpper(c); |
| 638 } | 601 } |
| 639 if (!(ored & ~0x7F)) | 602 if (!(ored & ~0x7F)) |
| 640 return newImpl.release(); | |
| 641 | |
| 642 // Do a slower implementation for cases that include non-ASCII characters. | |
| 643 bool error; | |
| 644 int32_t realLength = Unicode::toUpper(data16, length, source16, m_length, &e
rror); | |
| 645 if (!error && realLength == length) | |
| 646 return newImpl; | |
| 647 newImpl = createUninitialized(realLength, data16); | |
| 648 Unicode::toUpper(data16, realLength, source16, m_length, &error); | |
| 649 if (error) | |
| 650 return this; | |
| 651 return newImpl.release(); | 603 return newImpl.release(); |
| 652 } | 604 |
| 653 | 605 // Do a slower implementation for cases that include non-ASCII characters. |
| 654 static bool inline localeIdMatchesLang(const AtomicString& localeId, const char*
lang) | 606 bool error; |
| 655 { | 607 int32_t realLength = Unicode::toUpper(data16, length, source16, m_length, &err
or); |
| 656 if (equalIgnoringCase(localeId, lang)) | 608 if (!error && realLength == length) |
| 657 return true; | 609 return newImpl; |
| 658 static char localeIdPrefix[4]; | 610 newImpl = createUninitialized(realLength, data16); |
| 659 static const char delimeter[4] = "-_@"; | 611 Unicode::toUpper(data16, realLength, source16, m_length, &error); |
| 660 | 612 if (error) |
| 661 size_t langLength = strlen(lang); | 613 return this; |
| 662 RELEASE_ASSERT(langLength >= 2 && langLength <= 3); | 614 return newImpl.release(); |
| 663 strncpy(localeIdPrefix, lang, langLength); | 615 } |
| 664 for (int i = 0; i < 3; ++i) { | 616 |
| 665 localeIdPrefix[langLength] = delimeter[i]; | 617 static bool inline localeIdMatchesLang(const AtomicString& localeId, const char*
lang) { |
| 666 // case-insensitive comparison | 618 if (equalIgnoringCase(localeId, lang)) |
| 667 if (localeId.impl() && localeId.impl()->startsWith(localeIdPrefix, langL
ength + 1, TextCaseInsensitive)) | 619 return true; |
| 668 return true; | 620 static char localeIdPrefix[4]; |
| 669 } | 621 static const char delimeter[4] = "-_@"; |
| 670 return false; | 622 |
| 623 size_t langLength = strlen(lang); |
| 624 RELEASE_ASSERT(langLength >= 2 && langLength <= 3); |
| 625 strncpy(localeIdPrefix, lang, langLength); |
| 626 for (int i = 0; i < 3; ++i) { |
| 627 localeIdPrefix[langLength] = delimeter[i]; |
| 628 // case-insensitive comparison |
| 629 if (localeId.impl() && localeId.impl()->startsWith(localeIdPrefix, langLengt
h + 1, TextCaseInsensitive)) |
| 630 return true; |
| 631 } |
| 632 return false; |
| 671 } | 633 } |
| 672 | 634 |
| 673 typedef int32_t (*icuCaseConverter)(UChar*, int32_t, const UChar*, int32_t, cons
t char*, UErrorCode*); | 635 typedef int32_t (*icuCaseConverter)(UChar*, int32_t, const UChar*, int32_t, cons
t char*, UErrorCode*); |
| 674 | 636 |
| 675 static PassRefPtr<StringImpl> caseConvert(const UChar* source16, size_t length,
icuCaseConverter converter, const char* locale, StringImpl* originalString) | 637 static PassRefPtr<StringImpl> caseConvert(const UChar* source16, size_t length,
icuCaseConverter converter, const char* locale, StringImpl* originalString) { |
| 676 { | 638 UChar* data16; |
| 677 UChar* data16; | 639 size_t targetLength = length; |
| 678 size_t targetLength = length; | 640 RefPtr<StringImpl> output = StringImpl::createUninitialized(length, data16); |
| 679 RefPtr<StringImpl> output = StringImpl::createUninitialized(length, data16); | 641 do { |
| 680 do { | |
| 681 UErrorCode status = U_ZERO_ERROR; | |
| 682 targetLength = converter(data16, targetLength, source16, length, locale,
&status); | |
| 683 if (U_SUCCESS(status)) { | |
| 684 if (length > 0) | |
| 685 return output->substring(0, targetLength); | |
| 686 return output.release(); | |
| 687 } | |
| 688 if (status != U_BUFFER_OVERFLOW_ERROR) | |
| 689 return originalString; | |
| 690 // Expand the buffer. | |
| 691 output = StringImpl::createUninitialized(targetLength, data16); | |
| 692 } while (true); | |
| 693 } | |
| 694 | |
| 695 PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier) | |
| 696 { | |
| 697 // Use the more-optimized code path most of the time. | |
| 698 // Only Turkic (tr and az) languages and Lithuanian requires | |
| 699 // locale-specific lowercasing rules. Even though CLDR has el-Lower, | |
| 700 // it's identical to the locale-agnostic lowercasing. Context-dependent | |
| 701 // handling of Greek capital sigma is built into the common lowercasing | |
| 702 // function in ICU. | |
| 703 const char* localeForConversion = 0; | |
| 704 if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(local
eIdentifier, "az")) | |
| 705 localeForConversion = "tr"; | |
| 706 else if (localeIdMatchesLang(localeIdentifier, "lt")) | |
| 707 localeForConversion = "lt"; | |
| 708 else | |
| 709 return lower(); | |
| 710 | |
| 711 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max())) | |
| 712 CRASH(); | |
| 713 int length = m_length; | |
| 714 | |
| 715 RefPtr<StringImpl> upconverted = upconvertedString(); | |
| 716 const UChar* source16 = upconverted->characters16(); | |
| 717 return caseConvert(source16, length, u_strToLower, localeForConversion, this
); | |
| 718 } | |
| 719 | |
| 720 PassRefPtr<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier) | |
| 721 { | |
| 722 // Use the more-optimized code path most of the time. | |
| 723 // Only Turkic (tr and az) languages and Greek require locale-specific | |
| 724 // lowercasing rules. | |
| 725 icu::UnicodeString transliteratorId; | |
| 726 const char* localeForConversion = 0; | |
| 727 if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(local
eIdentifier, "az")) | |
| 728 localeForConversion = "tr"; | |
| 729 else if (localeIdMatchesLang(localeIdentifier, "el")) | |
| 730 transliteratorId = UNICODE_STRING_SIMPLE("el-Upper"); | |
| 731 else if (localeIdMatchesLang(localeIdentifier, "lt")) | |
| 732 localeForConversion = "lt"; | |
| 733 else | |
| 734 return upper(); | |
| 735 | |
| 736 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max())) | |
| 737 CRASH(); | |
| 738 int length = m_length; | |
| 739 | |
| 740 RefPtr<StringImpl> upconverted = upconvertedString(); | |
| 741 const UChar* source16 = upconverted->characters16(); | |
| 742 | |
| 743 if (localeForConversion) | |
| 744 return caseConvert(source16, length, u_strToUpper, localeForConversion,
this); | |
| 745 | |
| 746 // TODO(jungshik): Cache transliterator if perf penaly warrants it for Greek
. | |
| 747 UErrorCode status = U_ZERO_ERROR; | 642 UErrorCode status = U_ZERO_ERROR; |
| 748 OwnPtr<icu::Transliterator> translit = | 643 targetLength = converter(data16, targetLength, source16, length, locale, &st
atus); |
| 749 adoptPtr(icu::Transliterator::createInstance(transliteratorId, UTRANS_FO
RWARD, status)); | 644 if (U_SUCCESS(status)) { |
| 750 if (U_FAILURE(status)) | 645 if (length > 0) |
| 751 return upper(); | 646 return output->substring(0, targetLength); |
| 752 | 647 return output.release(); |
| 753 // target will be copy-on-write. | 648 } |
| 754 icu::UnicodeString target(false, source16, length); | 649 if (status != U_BUFFER_OVERFLOW_ERROR) |
| 755 translit->transliterate(target); | 650 return originalString; |
| 756 | 651 // Expand the buffer. |
| 757 return create(target.getBuffer(), target.length()); | 652 output = StringImpl::createUninitialized(targetLength, data16); |
| 758 } | 653 } while (true); |
| 759 | 654 } |
| 760 PassRefPtr<StringImpl> StringImpl::fill(UChar character) | 655 |
| 761 { | 656 PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier) { |
| 762 if (!(character & ~0x7F)) { | 657 // Use the more-optimized code path most of the time. |
| 763 LChar* data; | 658 // Only Turkic (tr and az) languages and Lithuanian requires |
| 764 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | 659 // locale-specific lowercasing rules. Even though CLDR has el-Lower, |
| 765 for (unsigned i = 0; i < m_length; ++i) | 660 // it's identical to the locale-agnostic lowercasing. Context-dependent |
| 766 data[i] = static_cast<LChar>(character); | 661 // handling of Greek capital sigma is built into the common lowercasing |
| 767 return newImpl.release(); | 662 // function in ICU. |
| 768 } | 663 const char* localeForConversion = 0; |
| 769 UChar* data; | 664 if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(localeI
dentifier, "az")) |
| 665 localeForConversion = "tr"; |
| 666 else if (localeIdMatchesLang(localeIdentifier, "lt")) |
| 667 localeForConversion = "lt"; |
| 668 else |
| 669 return lower(); |
| 670 |
| 671 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max())) |
| 672 CRASH(); |
| 673 int length = m_length; |
| 674 |
| 675 RefPtr<StringImpl> upconverted = upconvertedString(); |
| 676 const UChar* source16 = upconverted->characters16(); |
| 677 return caseConvert(source16, length, u_strToLower, localeForConversion, this); |
| 678 } |
| 679 |
| 680 PassRefPtr<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier) { |
| 681 // Use the more-optimized code path most of the time. |
| 682 // Only Turkic (tr and az) languages and Greek require locale-specific |
| 683 // lowercasing rules. |
| 684 icu::UnicodeString transliteratorId; |
| 685 const char* localeForConversion = 0; |
| 686 if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(localeI
dentifier, "az")) |
| 687 localeForConversion = "tr"; |
| 688 else if (localeIdMatchesLang(localeIdentifier, "el")) |
| 689 transliteratorId = UNICODE_STRING_SIMPLE("el-Upper"); |
| 690 else if (localeIdMatchesLang(localeIdentifier, "lt")) |
| 691 localeForConversion = "lt"; |
| 692 else |
| 693 return upper(); |
| 694 |
| 695 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max())) |
| 696 CRASH(); |
| 697 int length = m_length; |
| 698 |
| 699 RefPtr<StringImpl> upconverted = upconvertedString(); |
| 700 const UChar* source16 = upconverted->characters16(); |
| 701 |
| 702 if (localeForConversion) |
| 703 return caseConvert(source16, length, u_strToUpper, localeForConversion, this
); |
| 704 |
| 705 // TODO(jungshik): Cache transliterator if perf penaly warrants it for Greek. |
| 706 UErrorCode status = U_ZERO_ERROR; |
| 707 OwnPtr<icu::Transliterator> translit = |
| 708 adoptPtr(icu::Transliterator::createInstance(transliteratorId, UTRANS_FORW
ARD, status)); |
| 709 if (U_FAILURE(status)) |
| 710 return upper(); |
| 711 |
| 712 // target will be copy-on-write. |
| 713 icu::UnicodeString target(false, source16, length); |
| 714 translit->transliterate(target); |
| 715 |
| 716 return create(target.getBuffer(), target.length()); |
| 717 } |
| 718 |
| 719 PassRefPtr<StringImpl> StringImpl::fill(UChar character) { |
| 720 if (!(character & ~0x7F)) { |
| 721 LChar* data; |
| 770 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | 722 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); |
| 771 for (unsigned i = 0; i < m_length; ++i) | 723 for (unsigned i = 0; i < m_length; ++i) |
| 772 data[i] = character; | 724 data[i] = static_cast<LChar>(character); |
| 773 return newImpl.release(); | 725 return newImpl.release(); |
| 774 } | 726 } |
| 775 | 727 UChar* data; |
| 776 PassRefPtr<StringImpl> StringImpl::foldCase() | 728 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); |
| 777 { | 729 for (unsigned i = 0; i < m_length; ++i) |
| 778 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::ma
x())); | 730 data[i] = character; |
| 779 int32_t length = m_length; | 731 return newImpl.release(); |
| 780 | 732 } |
| 781 if (is8Bit()) { | 733 |
| 782 // Do a faster loop for the case where all the characters are ASCII. | 734 PassRefPtr<StringImpl> StringImpl::foldCase() { |
| 783 LChar* data; | 735 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max(
))); |
| 784 RefPtr <StringImpl>newImpl = createUninitialized(m_length, data); | 736 int32_t length = m_length; |
| 785 LChar ored = 0; | 737 |
| 786 | 738 if (is8Bit()) { |
| 787 for (int32_t i = 0; i < length; ++i) { | |
| 788 LChar c = characters8()[i]; | |
| 789 data[i] = toASCIILower(c); | |
| 790 ored |= c; | |
| 791 } | |
| 792 | |
| 793 if (!(ored & ~0x7F)) | |
| 794 return newImpl.release(); | |
| 795 | |
| 796 // Do a slower implementation for cases that include non-ASCII Latin-1 c
haracters. | |
| 797 for (int32_t i = 0; i < length; ++i) | |
| 798 data[i] = static_cast<LChar>(Unicode::toLower(characters8()[i])); | |
| 799 | |
| 800 return newImpl.release(); | |
| 801 } | |
| 802 | |
| 803 // Do a faster loop for the case where all the characters are ASCII. | 739 // Do a faster loop for the case where all the characters are ASCII. |
| 804 UChar* data; | 740 LChar* data; |
| 805 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | 741 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); |
| 806 UChar ored = 0; | 742 LChar ored = 0; |
| 743 |
| 807 for (int32_t i = 0; i < length; ++i) { | 744 for (int32_t i = 0; i < length; ++i) { |
| 808 UChar c = characters16()[i]; | 745 LChar c = characters8()[i]; |
| 809 ored |= c; | 746 data[i] = toASCIILower(c); |
| 810 data[i] = toASCIILower(c); | 747 ored |= c; |
| 811 } | 748 } |
| 749 |
| 812 if (!(ored & ~0x7F)) | 750 if (!(ored & ~0x7F)) |
| 813 return newImpl.release(); | 751 return newImpl.release(); |
| 814 | 752 |
| 815 // Do a slower implementation for cases that include non-ASCII characters. | 753 // Do a slower implementation for cases that include non-ASCII Latin-1 chara
cters. |
| 816 bool error; | 754 for (int32_t i = 0; i < length; ++i) |
| 817 int32_t realLength = Unicode::foldCase(data, length, characters16(), m_lengt
h, &error); | 755 data[i] = static_cast<LChar>(Unicode::toLower(characters8()[i])); |
| 818 if (!error && realLength == length) | 756 |
| 819 return newImpl.release(); | |
| 820 newImpl = createUninitialized(realLength, data); | |
| 821 Unicode::foldCase(data, realLength, characters16(), m_length, &error); | |
| 822 if (error) | |
| 823 return this; | |
| 824 return newImpl.release(); | 757 return newImpl.release(); |
| 758 } |
| 759 |
| 760 // Do a faster loop for the case where all the characters are ASCII. |
| 761 UChar* data; |
| 762 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); |
| 763 UChar ored = 0; |
| 764 for (int32_t i = 0; i < length; ++i) { |
| 765 UChar c = characters16()[i]; |
| 766 ored |= c; |
| 767 data[i] = toASCIILower(c); |
| 768 } |
| 769 if (!(ored & ~0x7F)) |
| 770 return newImpl.release(); |
| 771 |
| 772 // Do a slower implementation for cases that include non-ASCII characters. |
| 773 bool error; |
| 774 int32_t realLength = Unicode::foldCase(data, length, characters16(), m_length,
&error); |
| 775 if (!error && realLength == length) |
| 776 return newImpl.release(); |
| 777 newImpl = createUninitialized(realLength, data); |
| 778 Unicode::foldCase(data, realLength, characters16(), m_length, &error); |
| 779 if (error) |
| 780 return this; |
| 781 return newImpl.release(); |
| 825 } | 782 } |
| 826 | 783 |
| 827 template <class UCharPredicate> | 784 template <class UCharPredicate> |
| 828 inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters(UCharPredicate
predicate) | 785 inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters(UCharPredicate
predicate) { |
| 829 { | 786 if (!m_length) |
| 830 if (!m_length) | 787 return empty(); |
| 831 return empty(); | 788 |
| 832 | 789 unsigned start = 0; |
| 833 unsigned start = 0; | 790 unsigned end = m_length - 1; |
| 834 unsigned end = m_length - 1; | 791 |
| 835 | 792 // skip white space from start |
| 836 // skip white space from start | 793 while (start <= end && predicate(is8Bit() ? characters8()[start] : characters1
6()[start])) |
| 837 while (start <= end && predicate(is8Bit() ? characters8()[start] : character
s16()[start])) | 794 ++start; |
| 838 ++start; | 795 |
| 839 | 796 // only white space |
| 840 // only white space | 797 if (start > end) |
| 841 if (start > end) | 798 return empty(); |
| 842 return empty(); | 799 |
| 843 | 800 // skip white space from end |
| 844 // skip white space from end | 801 while (end && predicate(is8Bit() ? characters8()[end] : characters16()[end])) |
| 845 while (end && predicate(is8Bit() ? characters8()[end] : characters16()[end])
) | 802 --end; |
| 846 --end; | 803 |
| 847 | 804 if (!start && end == m_length - 1) |
| 848 if (!start && end == m_length - 1) | 805 return this; |
| 849 return this; | 806 if (is8Bit()) |
| 850 if (is8Bit()) | 807 return create(characters8() + start, end + 1 - start); |
| 851 return create(characters8() + start, end + 1 - start); | 808 return create(characters16() + start, end + 1 - start); |
| 852 return create(characters16() + start, end + 1 - start); | |
| 853 } | 809 } |
| 854 | 810 |
| 855 class UCharPredicate { | 811 class UCharPredicate { |
| 856 public: | 812 public: |
| 857 inline UCharPredicate(CharacterMatchFunctionPtr function): m_function(functi
on) { } | 813 inline UCharPredicate(CharacterMatchFunctionPtr function) |
| 858 | 814 : m_function(function) {} |
| 859 inline bool operator()(UChar ch) const | 815 |
| 860 { | 816 inline bool operator()(UChar ch) const { |
| 861 return m_function(ch); | 817 return m_function(ch); |
| 862 } | 818 } |
| 863 | 819 |
| 864 private: | 820 private: |
| 865 const CharacterMatchFunctionPtr m_function; | 821 const CharacterMatchFunctionPtr m_function; |
| 866 }; | 822 }; |
| 867 | 823 |
| 868 class SpaceOrNewlinePredicate { | 824 class SpaceOrNewlinePredicate { |
| 869 public: | 825 public: |
| 870 inline bool operator()(UChar ch) const | 826 inline bool operator()(UChar ch) const { |
| 871 { | 827 return isSpaceOrNewline(ch); |
| 872 return isSpaceOrNewline(ch); | 828 } |
| 873 } | |
| 874 }; | 829 }; |
| 875 | 830 |
| 876 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace() | 831 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace() { |
| 877 { | 832 return stripMatchedCharacters(SpaceOrNewlinePredicate()); |
| 878 return stripMatchedCharacters(SpaceOrNewlinePredicate()); | 833 } |
| 879 } | 834 |
| 880 | 835 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhi
teSpace) { |
| 881 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhi
teSpace) | 836 return stripMatchedCharacters(UCharPredicate(isWhiteSpace)); |
| 882 { | |
| 883 return stripMatchedCharacters(UCharPredicate(isWhiteSpace)); | |
| 884 } | 837 } |
| 885 | 838 |
| 886 template <typename CharType> | 839 template <typename CharType> |
| 887 ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters(const CharType
* characters, CharacterMatchFunctionPtr findMatch) | 840 ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters(const CharType
* characters, CharacterMatchFunctionPtr findMatch) { |
| 888 { | 841 const CharType* from = characters; |
| 889 const CharType* from = characters; | 842 const CharType* fromend = from + m_length; |
| 890 const CharType* fromend = from + m_length; | 843 |
| 891 | 844 // Assume the common case will not remove any characters |
| 892 // Assume the common case will not remove any characters | 845 while (from != fromend && !findMatch(*from)) |
| 846 ++from; |
| 847 if (from == fromend) |
| 848 return this; |
| 849 |
| 850 StringBuffer<CharType> data(m_length); |
| 851 CharType* to = data.characters(); |
| 852 unsigned outc = from - characters; |
| 853 |
| 854 if (outc) |
| 855 memcpy(to, characters, outc * sizeof(CharType)); |
| 856 |
| 857 while (true) { |
| 858 while (from != fromend && findMatch(*from)) |
| 859 ++from; |
| 893 while (from != fromend && !findMatch(*from)) | 860 while (from != fromend && !findMatch(*from)) |
| 861 to[outc++] = *from++; |
| 862 if (from == fromend) |
| 863 break; |
| 864 } |
| 865 |
| 866 data.shrink(outc); |
| 867 |
| 868 return data.release(); |
| 869 } |
| 870 |
| 871 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr fi
ndMatch) { |
| 872 if (is8Bit()) |
| 873 return removeCharacters(characters8(), findMatch); |
| 874 return removeCharacters(characters16(), findMatch); |
| 875 } |
| 876 |
| 877 template <typename CharType, class UCharPredicate> |
| 878 inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UChar
Predicate predicate, StripBehavior stripBehavior) { |
| 879 StringBuffer<CharType> data(m_length); |
| 880 |
| 881 const CharType* from = getCharacters<CharType>(); |
| 882 const CharType* fromend = from + m_length; |
| 883 int outc = 0; |
| 884 bool changedToSpace = false; |
| 885 |
| 886 CharType* to = data.characters(); |
| 887 |
| 888 if (stripBehavior == StripExtraWhiteSpace) { |
| 889 while (true) { |
| 890 while (from != fromend && predicate(*from)) { |
| 891 if (*from != ' ') |
| 892 changedToSpace = true; |
| 894 ++from; | 893 ++from; |
| 895 if (from == fromend) | 894 } |
| 896 return this; | 895 while (from != fromend && !predicate(*from)) |
| 897 | 896 to[outc++] = *from++; |
| 898 StringBuffer<CharType> data(m_length); | 897 if (from != fromend) |
| 899 CharType* to = data.characters(); | 898 to[outc++] = ' '; |
| 900 unsigned outc = from - characters; | 899 else |
| 901 | 900 break; |
| 902 if (outc) | 901 } |
| 903 memcpy(to, characters, outc * sizeof(CharType)); | 902 |
| 904 | 903 if (outc > 0 && to[outc - 1] == ' ') |
| 905 while (true) { | 904 --outc; |
| 906 while (from != fromend && findMatch(*from)) | 905 } else { |
| 907 ++from; | 906 for (; from != fromend; ++from) { |
| 908 while (from != fromend && !findMatch(*from)) | 907 if (predicate(*from)) { |
| 909 to[outc++] = *from++; | 908 if (*from != ' ') |
| 910 if (from == fromend) | 909 changedToSpace = true; |
| 911 break; | 910 to[outc++] = ' '; |
| 912 } | 911 } else { |
| 913 | 912 to[outc++] = *from; |
| 914 data.shrink(outc); | 913 } |
| 915 | 914 } |
| 916 return data.release(); | 915 } |
| 917 } | 916 |
| 918 | 917 if (static_cast<unsigned>(outc) == m_length && !changedToSpace) |
| 919 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr fi
ndMatch) | 918 return this; |
| 920 { | 919 |
| 921 if (is8Bit()) | 920 data.shrink(outc); |
| 922 return removeCharacters(characters8(), findMatch); | 921 |
| 923 return removeCharacters(characters16(), findMatch); | 922 return data.release(); |
| 924 } | 923 } |
| 925 | 924 |
| 926 template <typename CharType, class UCharPredicate> | 925 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(StripBehavior stripBehavio
r) { |
| 927 inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UChar
Predicate predicate, StripBehavior stripBehavior) | 926 if (is8Bit()) |
| 928 { | 927 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(SpaceOrNewlinePre
dicate(), stripBehavior); |
| 929 StringBuffer<CharType> data(m_length); | 928 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(SpaceOrNewlinePredi
cate(), stripBehavior); |
| 930 | 929 } |
| 931 const CharType* from = getCharacters<CharType>(); | 930 |
| 932 const CharType* fromend = from + m_length; | 931 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr is
WhiteSpace, StripBehavior stripBehavior) { |
| 933 int outc = 0; | 932 if (is8Bit()) |
| 934 bool changedToSpace = false; | 933 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(UCharPredicate(is
WhiteSpace), stripBehavior); |
| 935 | 934 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(UCharPredicate(isWh
iteSpace), stripBehavior); |
| 936 CharType* to = data.characters(); | 935 } |
| 937 | 936 |
| 938 if (stripBehavior == StripExtraWhiteSpace) { | 937 int StringImpl::toIntStrict(bool* ok, int base) { |
| 939 while (true) { | 938 if (is8Bit()) |
| 940 while (from != fromend && predicate(*from)) { | 939 return charactersToIntStrict(characters8(), m_length, ok, base); |
| 941 if (*from != ' ') | 940 return charactersToIntStrict(characters16(), m_length, ok, base); |
| 942 changedToSpace = true; | 941 } |
| 943 ++from; | 942 |
| 944 } | 943 unsigned StringImpl::toUIntStrict(bool* ok, int base) { |
| 945 while (from != fromend && !predicate(*from)) | 944 if (is8Bit()) |
| 946 to[outc++] = *from++; | 945 return charactersToUIntStrict(characters8(), m_length, ok, base); |
| 947 if (from != fromend) | 946 return charactersToUIntStrict(characters16(), m_length, ok, base); |
| 948 to[outc++] = ' '; | 947 } |
| 949 else | 948 |
| 950 break; | 949 int64_t StringImpl::toInt64Strict(bool* ok, int base) { |
| 951 } | 950 if (is8Bit()) |
| 952 | 951 return charactersToInt64Strict(characters8(), m_length, ok, base); |
| 953 if (outc > 0 && to[outc - 1] == ' ') | 952 return charactersToInt64Strict(characters16(), m_length, ok, base); |
| 954 --outc; | 953 } |
| 955 } else { | 954 |
| 956 for (; from != fromend; ++from) { | 955 uint64_t StringImpl::toUInt64Strict(bool* ok, int base) { |
| 957 if (predicate(*from)) { | 956 if (is8Bit()) |
| 958 if (*from != ' ') | 957 return charactersToUInt64Strict(characters8(), m_length, ok, base); |
| 959 changedToSpace = true; | 958 return charactersToUInt64Strict(characters16(), m_length, ok, base); |
| 960 to[outc++] = ' '; | 959 } |
| 961 } else { | 960 |
| 962 to[outc++] = *from; | 961 int StringImpl::toInt(bool* ok) { |
| 963 } | 962 if (is8Bit()) |
| 964 } | 963 return charactersToInt(characters8(), m_length, ok); |
| 965 } | 964 return charactersToInt(characters16(), m_length, ok); |
| 966 | 965 } |
| 967 if (static_cast<unsigned>(outc) == m_length && !changedToSpace) | 966 |
| 968 return this; | 967 unsigned StringImpl::toUInt(bool* ok) { |
| 969 | 968 if (is8Bit()) |
| 970 data.shrink(outc); | 969 return charactersToUInt(characters8(), m_length, ok); |
| 971 | 970 return charactersToUInt(characters16(), m_length, ok); |
| 972 return data.release(); | 971 } |
| 973 } | 972 |
| 974 | 973 int64_t StringImpl::toInt64(bool* ok) { |
| 975 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(StripBehavior stripBehavio
r) | 974 if (is8Bit()) |
| 976 { | 975 return charactersToInt64(characters8(), m_length, ok); |
| 977 if (is8Bit()) | 976 return charactersToInt64(characters16(), m_length, ok); |
| 978 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(SpaceOrNewlin
ePredicate(), stripBehavior); | 977 } |
| 979 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(SpaceOrNewlinePre
dicate(), stripBehavior); | 978 |
| 980 } | 979 uint64_t StringImpl::toUInt64(bool* ok) { |
| 981 | 980 if (is8Bit()) |
| 982 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr is
WhiteSpace, StripBehavior stripBehavior) | 981 return charactersToUInt64(characters8(), m_length, ok); |
| 983 { | 982 return charactersToUInt64(characters16(), m_length, ok); |
| 984 if (is8Bit()) | 983 } |
| 985 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(UCharPredicat
e(isWhiteSpace), stripBehavior); | 984 |
| 986 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(UCharPredicate(is
WhiteSpace), stripBehavior); | 985 double StringImpl::toDouble(bool* ok) { |
| 987 } | 986 if (is8Bit()) |
| 988 | 987 return charactersToDouble(characters8(), m_length, ok); |
| 989 int StringImpl::toIntStrict(bool* ok, int base) | 988 return charactersToDouble(characters16(), m_length, ok); |
| 990 { | 989 } |
| 991 if (is8Bit()) | 990 |
| 992 return charactersToIntStrict(characters8(), m_length, ok, base); | 991 float StringImpl::toFloat(bool* ok) { |
| 993 return charactersToIntStrict(characters16(), m_length, ok, base); | 992 if (is8Bit()) |
| 994 } | 993 return charactersToFloat(characters8(), m_length, ok); |
| 995 | 994 return charactersToFloat(characters16(), m_length, ok); |
| 996 unsigned StringImpl::toUIntStrict(bool* ok, int base) | |
| 997 { | |
| 998 if (is8Bit()) | |
| 999 return charactersToUIntStrict(characters8(), m_length, ok, base); | |
| 1000 return charactersToUIntStrict(characters16(), m_length, ok, base); | |
| 1001 } | |
| 1002 | |
| 1003 int64_t StringImpl::toInt64Strict(bool* ok, int base) | |
| 1004 { | |
| 1005 if (is8Bit()) | |
| 1006 return charactersToInt64Strict(characters8(), m_length, ok, base); | |
| 1007 return charactersToInt64Strict(characters16(), m_length, ok, base); | |
| 1008 } | |
| 1009 | |
| 1010 uint64_t StringImpl::toUInt64Strict(bool* ok, int base) | |
| 1011 { | |
| 1012 if (is8Bit()) | |
| 1013 return charactersToUInt64Strict(characters8(), m_length, ok, base); | |
| 1014 return charactersToUInt64Strict(characters16(), m_length, ok, base); | |
| 1015 } | |
| 1016 | |
| 1017 int StringImpl::toInt(bool* ok) | |
| 1018 { | |
| 1019 if (is8Bit()) | |
| 1020 return charactersToInt(characters8(), m_length, ok); | |
| 1021 return charactersToInt(characters16(), m_length, ok); | |
| 1022 } | |
| 1023 | |
| 1024 unsigned StringImpl::toUInt(bool* ok) | |
| 1025 { | |
| 1026 if (is8Bit()) | |
| 1027 return charactersToUInt(characters8(), m_length, ok); | |
| 1028 return charactersToUInt(characters16(), m_length, ok); | |
| 1029 } | |
| 1030 | |
| 1031 int64_t StringImpl::toInt64(bool* ok) | |
| 1032 { | |
| 1033 if (is8Bit()) | |
| 1034 return charactersToInt64(characters8(), m_length, ok); | |
| 1035 return charactersToInt64(characters16(), m_length, ok); | |
| 1036 } | |
| 1037 | |
| 1038 uint64_t StringImpl::toUInt64(bool* ok) | |
| 1039 { | |
| 1040 if (is8Bit()) | |
| 1041 return charactersToUInt64(characters8(), m_length, ok); | |
| 1042 return charactersToUInt64(characters16(), m_length, ok); | |
| 1043 } | |
| 1044 | |
| 1045 double StringImpl::toDouble(bool* ok) | |
| 1046 { | |
| 1047 if (is8Bit()) | |
| 1048 return charactersToDouble(characters8(), m_length, ok); | |
| 1049 return charactersToDouble(characters16(), m_length, ok); | |
| 1050 } | |
| 1051 | |
| 1052 float StringImpl::toFloat(bool* ok) | |
| 1053 { | |
| 1054 if (is8Bit()) | |
| 1055 return charactersToFloat(characters8(), m_length, ok); | |
| 1056 return charactersToFloat(characters16(), m_length, ok); | |
| 1057 } | 995 } |
| 1058 | 996 |
| 1059 // Table is based on ftp://ftp.unicode.org/Public/UNIDATA/CaseFolding.txt | 997 // Table is based on ftp://ftp.unicode.org/Public/UNIDATA/CaseFolding.txt |
| 1060 const UChar StringImpl::latin1CaseFoldTable[256] = { | 998 const UChar StringImpl::latin1CaseFoldTable[256] = { |
| 1061 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x00
09, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, | 999 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x00
09, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, |
| 1062 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x00
19, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, | 1000 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x00
19, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, |
| 1063 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x00
29, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, | 1001 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x00
29, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, |
| 1064 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x00
39, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, | 1002 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x00
39, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, |
| 1065 0x0040, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x00
69, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, | 1003 0x0040, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x00
69, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, |
| 1066 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x00
79, 0x007a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, | 1004 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x00
79, 0x007a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, |
| 1067 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x00
69, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, | 1005 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x00
69, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, |
| 1068 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x00
79, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, | 1006 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x00
79, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, |
| 1069 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x00
89, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, | 1007 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x00
89, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, |
| 1070 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x00
99, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, | 1008 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x00
99, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, |
| 1071 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00
a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, | 1009 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00
a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, |
| 1072 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x03bc, 0x00b6, 0x00b7, 0x00b8, 0x00
b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, | 1010 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x03bc, 0x00b6, 0x00b7, 0x00b8, 0x00
b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, |
| 1073 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00
e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, | 1011 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00
e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, |
| 1074 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00d7, 0x00f8, 0x00
f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00df, | 1012 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00d7, 0x00f8, 0x00
f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00df, |
| 1075 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00
e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, | 1013 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00
e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, |
| 1076 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00
f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, | 1014 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00
f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, |
| 1077 }; | 1015 }; |
| 1078 | 1016 |
| 1079 bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length) | 1017 bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length) { |
| 1080 { | 1018 while (length--) { |
| 1081 while (length--) { | 1019 if (StringImpl::latin1CaseFoldTable[*a++] != StringImpl::latin1CaseFoldTable
[*b++]) |
| 1082 if (StringImpl::latin1CaseFoldTable[*a++] != StringImpl::latin1CaseFoldT
able[*b++]) | 1020 return false; |
| 1083 return false; | 1021 } |
| 1084 } | 1022 return true; |
| 1085 return true; | 1023 } |
| 1086 } | 1024 |
| 1087 | 1025 bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length) { |
| 1088 bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length) | 1026 while (length--) { |
| 1089 { | 1027 if (foldCase(*a++) != StringImpl::latin1CaseFoldTable[*b++]) |
| 1090 while (length--) { | 1028 return false; |
| 1091 if (foldCase(*a++) != StringImpl::latin1CaseFoldTable[*b++]) | 1029 } |
| 1092 return false; | 1030 return true; |
| 1093 } | 1031 } |
| 1094 return true; | 1032 |
| 1095 } | 1033 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start)
{ |
| 1096 | 1034 if (is8Bit()) |
| 1097 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start) | 1035 return WTF::find(characters8(), m_length, matchFunction, start); |
| 1098 { | 1036 return WTF::find(characters16(), m_length, matchFunction, start); |
| 1037 } |
| 1038 |
| 1039 size_t StringImpl::find(const LChar* matchString, unsigned index) { |
| 1040 // Check for null or empty string to match against |
| 1041 if (!matchString) |
| 1042 return kNotFound; |
| 1043 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString)); |
| 1044 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max()); |
| 1045 unsigned matchLength = matchStringLength; |
| 1046 if (!matchLength) |
| 1047 return min(index, length()); |
| 1048 |
| 1049 // Optimization 1: fast case for strings of length 1. |
| 1050 if (matchLength == 1) |
| 1051 return WTF::find(characters16(), length(), *matchString, index); |
| 1052 |
| 1053 // Check index & matchLength are in range. |
| 1054 if (index > length()) |
| 1055 return kNotFound; |
| 1056 unsigned searchLength = length() - index; |
| 1057 if (matchLength > searchLength) |
| 1058 return kNotFound; |
| 1059 // delta is the number of additional times to test; delta == 0 means test only
once. |
| 1060 unsigned delta = searchLength - matchLength; |
| 1061 |
| 1062 const UChar* searchCharacters = characters16() + index; |
| 1063 |
| 1064 // Optimization 2: keep a running hash of the strings, |
| 1065 // only call equal if the hashes match. |
| 1066 unsigned searchHash = 0; |
| 1067 unsigned matchHash = 0; |
| 1068 for (unsigned i = 0; i < matchLength; ++i) { |
| 1069 searchHash += searchCharacters[i]; |
| 1070 matchHash += matchString[i]; |
| 1071 } |
| 1072 |
| 1073 unsigned i = 0; |
| 1074 // keep looping until we match |
| 1075 while (searchHash != matchHash || !equal(searchCharacters + i, matchString, ma
tchLength)) { |
| 1076 if (i == delta) |
| 1077 return kNotFound; |
| 1078 searchHash += searchCharacters[i + matchLength]; |
| 1079 searchHash -= searchCharacters[i]; |
| 1080 ++i; |
| 1081 } |
| 1082 return index + i; |
| 1083 } |
| 1084 |
| 1085 template <typename CharType> |
| 1086 ALWAYS_INLINE size_t findIgnoringCaseInternal(const CharType* searchCharacters,
const LChar* matchString, unsigned index, unsigned searchLength, unsigned matchL
ength) { |
| 1087 // delta is the number of additional times to test; delta == 0 means test only
once. |
| 1088 unsigned delta = searchLength - matchLength; |
| 1089 |
| 1090 unsigned i = 0; |
| 1091 while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) { |
| 1092 if (i == delta) |
| 1093 return kNotFound; |
| 1094 ++i; |
| 1095 } |
| 1096 return index + i; |
| 1097 } |
| 1098 |
| 1099 size_t StringImpl::findIgnoringCase(const LChar* matchString, unsigned index) { |
| 1100 // Check for null or empty string to match against |
| 1101 if (!matchString) |
| 1102 return kNotFound; |
| 1103 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString)); |
| 1104 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max()); |
| 1105 unsigned matchLength = matchStringLength; |
| 1106 if (!matchLength) |
| 1107 return min(index, length()); |
| 1108 |
| 1109 // Check index & matchLength are in range. |
| 1110 if (index > length()) |
| 1111 return kNotFound; |
| 1112 unsigned searchLength = length() - index; |
| 1113 if (matchLength > searchLength) |
| 1114 return kNotFound; |
| 1115 |
| 1116 if (is8Bit()) |
| 1117 return findIgnoringCaseInternal(characters8() + index, matchString, index, s
earchLength, matchLength); |
| 1118 return findIgnoringCaseInternal(characters16() + index, matchString, index, se
archLength, matchLength); |
| 1119 } |
| 1120 |
| 1121 template <typename SearchCharacterType, typename MatchCharacterType> |
| 1122 ALWAYS_INLINE static size_t findInternal(const SearchCharacterType* searchCharac
ters, const MatchCharacterType* matchCharacters, unsigned index, unsigned search
Length, unsigned matchLength) { |
| 1123 // Optimization: keep a running hash of the strings, |
| 1124 // only call equal() if the hashes match. |
| 1125 |
| 1126 // delta is the number of additional times to test; delta == 0 means test only
once. |
| 1127 unsigned delta = searchLength - matchLength; |
| 1128 |
| 1129 unsigned searchHash = 0; |
| 1130 unsigned matchHash = 0; |
| 1131 |
| 1132 for (unsigned i = 0; i < matchLength; ++i) { |
| 1133 searchHash += searchCharacters[i]; |
| 1134 matchHash += matchCharacters[i]; |
| 1135 } |
| 1136 |
| 1137 unsigned i = 0; |
| 1138 // keep looping until we match |
| 1139 while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacters
, matchLength)) { |
| 1140 if (i == delta) |
| 1141 return kNotFound; |
| 1142 searchHash += searchCharacters[i + matchLength]; |
| 1143 searchHash -= searchCharacters[i]; |
| 1144 ++i; |
| 1145 } |
| 1146 return index + i; |
| 1147 } |
| 1148 |
| 1149 size_t StringImpl::find(StringImpl* matchString) { |
| 1150 // Check for null string to match against |
| 1151 if (UNLIKELY(!matchString)) |
| 1152 return kNotFound; |
| 1153 unsigned matchLength = matchString->length(); |
| 1154 |
| 1155 // Optimization 1: fast case for strings of length 1. |
| 1156 if (matchLength == 1) { |
| 1157 if (is8Bit()) { |
| 1158 if (matchString->is8Bit()) |
| 1159 return WTF::find(characters8(), length(), matchString->characters8()[0])
; |
| 1160 return WTF::find(characters8(), length(), matchString->characters16()[0]); |
| 1161 } |
| 1162 if (matchString->is8Bit()) |
| 1163 return WTF::find(characters16(), length(), matchString->characters8()[0]); |
| 1164 return WTF::find(characters16(), length(), matchString->characters16()[0]); |
| 1165 } |
| 1166 |
| 1167 // Check matchLength is in range. |
| 1168 if (matchLength > length()) |
| 1169 return kNotFound; |
| 1170 |
| 1171 // Check for empty string to match against |
| 1172 if (UNLIKELY(!matchLength)) |
| 1173 return 0; |
| 1174 |
| 1175 if (is8Bit()) { |
| 1176 if (matchString->is8Bit()) |
| 1177 return findInternal(characters8(), matchString->characters8(), 0, length()
, matchLength); |
| 1178 return findInternal(characters8(), matchString->characters16(), 0, length(),
matchLength); |
| 1179 } |
| 1180 |
| 1181 if (matchString->is8Bit()) |
| 1182 return findInternal(characters16(), matchString->characters8(), 0, length(),
matchLength); |
| 1183 |
| 1184 return findInternal(characters16(), matchString->characters16(), 0, length(),
matchLength); |
| 1185 } |
| 1186 |
| 1187 size_t StringImpl::find(StringImpl* matchString, unsigned index) { |
| 1188 // Check for null or empty string to match against |
| 1189 if (UNLIKELY(!matchString)) |
| 1190 return kNotFound; |
| 1191 |
| 1192 unsigned matchLength = matchString->length(); |
| 1193 |
| 1194 // Optimization 1: fast case for strings of length 1. |
| 1195 if (matchLength == 1) { |
| 1099 if (is8Bit()) | 1196 if (is8Bit()) |
| 1100 return WTF::find(characters8(), m_length, matchFunction, start); | 1197 return WTF::find(characters8(), length(), (*matchString)[0], index); |
| 1101 return WTF::find(characters16(), m_length, matchFunction, start); | 1198 return WTF::find(characters16(), length(), (*matchString)[0], index); |
| 1102 } | 1199 } |
| 1103 | 1200 |
| 1104 size_t StringImpl::find(const LChar* matchString, unsigned index) | 1201 if (UNLIKELY(!matchLength)) |
| 1105 { | 1202 return min(index, length()); |
| 1106 // Check for null or empty string to match against | 1203 |
| 1107 if (!matchString) | 1204 // Check index & matchLength are in range. |
| 1108 return kNotFound; | 1205 if (index > length()) |
| 1109 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString)
); | 1206 return kNotFound; |
| 1110 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max()); | 1207 unsigned searchLength = length() - index; |
| 1111 unsigned matchLength = matchStringLength; | 1208 if (matchLength > searchLength) |
| 1112 if (!matchLength) | 1209 return kNotFound; |
| 1113 return min(index, length()); | 1210 |
| 1114 | 1211 if (is8Bit()) { |
| 1115 // Optimization 1: fast case for strings of length 1. | 1212 if (matchString->is8Bit()) |
| 1116 if (matchLength == 1) | 1213 return findInternal(characters8() + index, matchString->characters8(), ind
ex, searchLength, matchLength); |
| 1117 return WTF::find(characters16(), length(), *matchString, index); | 1214 return findInternal(characters8() + index, matchString->characters16(), inde
x, searchLength, matchLength); |
| 1118 | 1215 } |
| 1119 // Check index & matchLength are in range. | 1216 |
| 1120 if (index > length()) | 1217 if (matchString->is8Bit()) |
| 1121 return kNotFound; | 1218 return findInternal(characters16() + index, matchString->characters8(), inde
x, searchLength, matchLength); |
| 1122 unsigned searchLength = length() - index; | 1219 |
| 1123 if (matchLength > searchLength) | 1220 return findInternal(characters16() + index, matchString->characters16(), index
, searchLength, matchLength); |
| 1124 return kNotFound; | 1221 } |
| 1125 // delta is the number of additional times to test; delta == 0 means test on
ly once. | 1222 |
| 1126 unsigned delta = searchLength - matchLength; | 1223 template <typename SearchCharacterType, typename MatchCharacterType> |
| 1127 | 1224 ALWAYS_INLINE static size_t findIgnoringCaseInner(const SearchCharacterType* sea
rchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsign
ed searchLength, unsigned matchLength) { |
| 1128 const UChar* searchCharacters = characters16() + index; | 1225 // delta is the number of additional times to test; delta == 0 means test only
once. |
| 1129 | 1226 unsigned delta = searchLength - matchLength; |
| 1130 // Optimization 2: keep a running hash of the strings, | 1227 |
| 1131 // only call equal if the hashes match. | 1228 unsigned i = 0; |
| 1132 unsigned searchHash = 0; | 1229 // keep looping until we match |
| 1133 unsigned matchHash = 0; | 1230 while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength))
{ |
| 1134 for (unsigned i = 0; i < matchLength; ++i) { | 1231 if (i == delta) |
| 1135 searchHash += searchCharacters[i]; | 1232 return kNotFound; |
| 1136 matchHash += matchString[i]; | 1233 ++i; |
| 1137 } | 1234 } |
| 1138 | 1235 return index + i; |
| 1139 unsigned i = 0; | 1236 } |
| 1140 // keep looping until we match | 1237 |
| 1141 while (searchHash != matchHash || !equal(searchCharacters + i, matchString,
matchLength)) { | 1238 size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index) { |
| 1142 if (i == delta) | 1239 // Check for null or empty string to match against |
| 1143 return kNotFound; | 1240 if (!matchString) |
| 1144 searchHash += searchCharacters[i + matchLength]; | 1241 return kNotFound; |
| 1145 searchHash -= searchCharacters[i]; | 1242 unsigned matchLength = matchString->length(); |
| 1146 ++i; | 1243 if (!matchLength) |
| 1147 } | 1244 return min(index, length()); |
| 1148 return index + i; | 1245 |
| 1149 } | 1246 // Check index & matchLength are in range. |
| 1150 | 1247 if (index > length()) |
| 1151 template<typename CharType> | 1248 return kNotFound; |
| 1152 ALWAYS_INLINE size_t findIgnoringCaseInternal(const CharType* searchCharacters,
const LChar* matchString, unsigned index, unsigned searchLength, unsigned matchL
ength) | 1249 unsigned searchLength = length() - index; |
| 1153 { | 1250 if (matchLength > searchLength) |
| 1154 // delta is the number of additional times to test; delta == 0 means test on
ly once. | 1251 return kNotFound; |
| 1155 unsigned delta = searchLength - matchLength; | 1252 |
| 1156 | 1253 if (is8Bit()) { |
| 1157 unsigned i = 0; | 1254 if (matchString->is8Bit()) |
| 1158 while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) { | 1255 return findIgnoringCaseInner(characters8() + index, matchString->character
s8(), index, searchLength, matchLength); |
| 1159 if (i == delta) | 1256 return findIgnoringCaseInner(characters8() + index, matchString->characters1
6(), index, searchLength, matchLength); |
| 1160 return kNotFound; | 1257 } |
| 1161 ++i; | 1258 |
| 1162 } | 1259 if (matchString->is8Bit()) |
| 1163 return index + i; | 1260 return findIgnoringCaseInner(characters16() + index, matchString->characters
8(), index, searchLength, matchLength); |
| 1164 } | 1261 |
| 1165 | 1262 return findIgnoringCaseInner(characters16() + index, matchString->characters16
(), index, searchLength, matchLength); |
| 1166 size_t StringImpl::findIgnoringCase(const LChar* matchString, unsigned index) | 1263 } |
| 1167 { | 1264 |
| 1168 // Check for null or empty string to match against | 1265 size_t StringImpl::findNextLineStart(unsigned index) { |
| 1169 if (!matchString) | 1266 if (is8Bit()) |
| 1170 return kNotFound; | 1267 return WTF::findNextLineStart(characters8(), m_length, index); |
| 1171 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString)
); | 1268 return WTF::findNextLineStart(characters16(), m_length, index); |
| 1172 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max()); | 1269 } |
| 1173 unsigned matchLength = matchStringLength; | 1270 |
| 1174 if (!matchLength) | 1271 size_t StringImpl::count(LChar c) const { |
| 1175 return min(index, length()); | 1272 int count = 0; |
| 1176 | 1273 if (is8Bit()) { |
| 1177 // Check index & matchLength are in range. | 1274 for (size_t i = 0; i < m_length; ++i) |
| 1178 if (index > length()) | 1275 count += characters8()[i] == c; |
| 1179 return kNotFound; | 1276 } else { |
| 1180 unsigned searchLength = length() - index; | 1277 for (size_t i = 0; i < m_length; ++i) |
| 1181 if (matchLength > searchLength) | 1278 count += characters16()[i] == c; |
| 1182 return kNotFound; | 1279 } |
| 1183 | 1280 return count; |
| 1281 } |
| 1282 |
| 1283 size_t StringImpl::reverseFind(UChar c, unsigned index) { |
| 1284 if (is8Bit()) |
| 1285 return WTF::reverseFind(characters8(), m_length, c, index); |
| 1286 return WTF::reverseFind(characters16(), m_length, c, index); |
| 1287 } |
| 1288 |
| 1289 template <typename SearchCharacterType, typename MatchCharacterType> |
| 1290 ALWAYS_INLINE static size_t reverseFindInner(const SearchCharacterType* searchCh
aracters, const MatchCharacterType* matchCharacters, unsigned index, unsigned le
ngth, unsigned matchLength) { |
| 1291 // Optimization: keep a running hash of the strings, |
| 1292 // only call equal if the hashes match. |
| 1293 |
| 1294 // delta is the number of additional times to test; delta == 0 means test only
once. |
| 1295 unsigned delta = min(index, length - matchLength); |
| 1296 |
| 1297 unsigned searchHash = 0; |
| 1298 unsigned matchHash = 0; |
| 1299 for (unsigned i = 0; i < matchLength; ++i) { |
| 1300 searchHash += searchCharacters[delta + i]; |
| 1301 matchHash += matchCharacters[i]; |
| 1302 } |
| 1303 |
| 1304 // keep looping until we match |
| 1305 while (searchHash != matchHash || !equal(searchCharacters + delta, matchCharac
ters, matchLength)) { |
| 1306 if (!delta) |
| 1307 return kNotFound; |
| 1308 --delta; |
| 1309 searchHash -= searchCharacters[delta + matchLength]; |
| 1310 searchHash += searchCharacters[delta]; |
| 1311 } |
| 1312 return delta; |
| 1313 } |
| 1314 |
| 1315 size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index) { |
| 1316 // Check for null or empty string to match against |
| 1317 if (!matchString) |
| 1318 return kNotFound; |
| 1319 unsigned matchLength = matchString->length(); |
| 1320 unsigned ourLength = length(); |
| 1321 if (!matchLength) |
| 1322 return min(index, ourLength); |
| 1323 |
| 1324 // Optimization 1: fast case for strings of length 1. |
| 1325 if (matchLength == 1) { |
| 1184 if (is8Bit()) | 1326 if (is8Bit()) |
| 1185 return findIgnoringCaseInternal(characters8() + index, matchString, inde
x, searchLength, matchLength); | 1327 return WTF::reverseFind(characters8(), ourLength, (*matchString)[0], index
); |
| 1186 return findIgnoringCaseInternal(characters16() + index, matchString, index,
searchLength, matchLength); | 1328 return WTF::reverseFind(characters16(), ourLength, (*matchString)[0], index)
; |
| 1329 } |
| 1330 |
| 1331 // Check index & matchLength are in range. |
| 1332 if (matchLength > ourLength) |
| 1333 return kNotFound; |
| 1334 |
| 1335 if (is8Bit()) { |
| 1336 if (matchString->is8Bit()) |
| 1337 return reverseFindInner(characters8(), matchString->characters8(), index,
ourLength, matchLength); |
| 1338 return reverseFindInner(characters8(), matchString->characters16(), index, o
urLength, matchLength); |
| 1339 } |
| 1340 |
| 1341 if (matchString->is8Bit()) |
| 1342 return reverseFindInner(characters16(), matchString->characters8(), index, o
urLength, matchLength); |
| 1343 |
| 1344 return reverseFindInner(characters16(), matchString->characters16(), index, ou
rLength, matchLength); |
| 1187 } | 1345 } |
| 1188 | 1346 |
| 1189 template <typename SearchCharacterType, typename MatchCharacterType> | 1347 template <typename SearchCharacterType, typename MatchCharacterType> |
| 1190 ALWAYS_INLINE static size_t findInternal(const SearchCharacterType* searchCharac
ters, const MatchCharacterType* matchCharacters, unsigned index, unsigned search
Length, unsigned matchLength) | 1348 ALWAYS_INLINE static size_t reverseFindIgnoringCaseInner(const SearchCharacterTy
pe* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index,
unsigned length, unsigned matchLength) { |
| 1191 { | 1349 // delta is the number of additional times to test; delta == 0 means test only
once. |
| 1192 // Optimization: keep a running hash of the strings, | 1350 unsigned delta = min(index, length - matchLength); |
| 1193 // only call equal() if the hashes match. | 1351 |
| 1194 | 1352 // keep looping until we match |
| 1195 // delta is the number of additional times to test; delta == 0 means test on
ly once. | 1353 while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLeng
th)) { |
| 1196 unsigned delta = searchLength - matchLength; | 1354 if (!delta) |
| 1197 | 1355 return kNotFound; |
| 1198 unsigned searchHash = 0; | 1356 --delta; |
| 1199 unsigned matchHash = 0; | 1357 } |
| 1200 | 1358 return delta; |
| 1201 for (unsigned i = 0; i < matchLength; ++i) { | 1359 } |
| 1202 searchHash += searchCharacters[i]; | 1360 |
| 1203 matchHash += matchCharacters[i]; | 1361 size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned ind
ex) { |
| 1204 } | 1362 // Check for null or empty string to match against |
| 1205 | 1363 if (!matchString) |
| 1206 unsigned i = 0; | 1364 return kNotFound; |
| 1207 // keep looping until we match | 1365 unsigned matchLength = matchString->length(); |
| 1208 while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacte
rs, matchLength)) { | 1366 unsigned ourLength = length(); |
| 1209 if (i == delta) | 1367 if (!matchLength) |
| 1210 return kNotFound; | 1368 return min(index, ourLength); |
| 1211 searchHash += searchCharacters[i + matchLength]; | 1369 |
| 1212 searchHash -= searchCharacters[i]; | 1370 // Check index & matchLength are in range. |
| 1213 ++i; | 1371 if (matchLength > ourLength) |
| 1214 } | 1372 return kNotFound; |
| 1215 return index + i; | 1373 |
| 1216 } | 1374 if (is8Bit()) { |
| 1217 | |
| 1218 size_t StringImpl::find(StringImpl* matchString) | |
| 1219 { | |
| 1220 // Check for null string to match against | |
| 1221 if (UNLIKELY(!matchString)) | |
| 1222 return kNotFound; | |
| 1223 unsigned matchLength = matchString->length(); | |
| 1224 | |
| 1225 // Optimization 1: fast case for strings of length 1. | |
| 1226 if (matchLength == 1) { | |
| 1227 if (is8Bit()) { | |
| 1228 if (matchString->is8Bit()) | |
| 1229 return WTF::find(characters8(), length(), matchString->character
s8()[0]); | |
| 1230 return WTF::find(characters8(), length(), matchString->characters16(
)[0]); | |
| 1231 } | |
| 1232 if (matchString->is8Bit()) | |
| 1233 return WTF::find(characters16(), length(), matchString->characters8(
)[0]); | |
| 1234 return WTF::find(characters16(), length(), matchString->characters16()[0
]); | |
| 1235 } | |
| 1236 | |
| 1237 // Check matchLength is in range. | |
| 1238 if (matchLength > length()) | |
| 1239 return kNotFound; | |
| 1240 | |
| 1241 // Check for empty string to match against | |
| 1242 if (UNLIKELY(!matchLength)) | |
| 1243 return 0; | |
| 1244 | |
| 1245 if (is8Bit()) { | |
| 1246 if (matchString->is8Bit()) | |
| 1247 return findInternal(characters8(), matchString->characters8(), 0, le
ngth(), matchLength); | |
| 1248 return findInternal(characters8(), matchString->characters16(), 0, lengt
h(), matchLength); | |
| 1249 } | |
| 1250 | |
| 1251 if (matchString->is8Bit()) | 1375 if (matchString->is8Bit()) |
| 1252 return findInternal(characters16(), matchString->characters8(), 0, lengt
h(), matchLength); | 1376 return reverseFindIgnoringCaseInner(characters8(), matchString->characters
8(), index, ourLength, matchLength); |
| 1253 | 1377 return reverseFindIgnoringCaseInner(characters8(), matchString->characters16
(), index, ourLength, matchLength); |
| 1254 return findInternal(characters16(), matchString->characters16(), 0, length()
, matchLength); | 1378 } |
| 1255 } | 1379 |
| 1256 | 1380 if (matchString->is8Bit()) |
| 1257 size_t StringImpl::find(StringImpl* matchString, unsigned index) | 1381 return reverseFindIgnoringCaseInner(characters16(), matchString->characters8
(), index, ourLength, matchLength); |
| 1258 { | 1382 |
| 1259 // Check for null or empty string to match against | 1383 return reverseFindIgnoringCaseInner(characters16(), matchString->characters16(
), index, ourLength, matchLength); |
| 1260 if (UNLIKELY(!matchString)) | 1384 } |
| 1261 return kNotFound; | 1385 |
| 1262 | 1386 ALWAYS_INLINE static bool equalInner(const StringImpl* stringImpl, unsigned star
tOffset, const char* matchString, unsigned matchLength, TextCaseSensitivity case
Sensitivity) { |
| 1263 unsigned matchLength = matchString->length(); | 1387 ASSERT(stringImpl); |
| 1264 | 1388 ASSERT(matchLength <= stringImpl->length()); |
| 1265 // Optimization 1: fast case for strings of length 1. | 1389 ASSERT(startOffset + matchLength <= stringImpl->length()); |
| 1266 if (matchLength == 1) { | 1390 |
| 1267 if (is8Bit()) | 1391 if (caseSensitivity == TextCaseSensitive) { |
| 1268 return WTF::find(characters8(), length(), (*matchString)[0], index); | |
| 1269 return WTF::find(characters16(), length(), (*matchString)[0], index); | |
| 1270 } | |
| 1271 | |
| 1272 if (UNLIKELY(!matchLength)) | |
| 1273 return min(index, length()); | |
| 1274 | |
| 1275 // Check index & matchLength are in range. | |
| 1276 if (index > length()) | |
| 1277 return kNotFound; | |
| 1278 unsigned searchLength = length() - index; | |
| 1279 if (matchLength > searchLength) | |
| 1280 return kNotFound; | |
| 1281 | |
| 1282 if (is8Bit()) { | |
| 1283 if (matchString->is8Bit()) | |
| 1284 return findInternal(characters8() + index, matchString->characters8(
), index, searchLength, matchLength); | |
| 1285 return findInternal(characters8() + index, matchString->characters16(),
index, searchLength, matchLength); | |
| 1286 } | |
| 1287 | |
| 1288 if (matchString->is8Bit()) | |
| 1289 return findInternal(characters16() + index, matchString->characters8(),
index, searchLength, matchLength); | |
| 1290 | |
| 1291 return findInternal(characters16() + index, matchString->characters16(), ind
ex, searchLength, matchLength); | |
| 1292 } | |
| 1293 | |
| 1294 template <typename SearchCharacterType, typename MatchCharacterType> | |
| 1295 ALWAYS_INLINE static size_t findIgnoringCaseInner(const SearchCharacterType* sea
rchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsign
ed searchLength, unsigned matchLength) | |
| 1296 { | |
| 1297 // delta is the number of additional times to test; delta == 0 means test on
ly once. | |
| 1298 unsigned delta = searchLength - matchLength; | |
| 1299 | |
| 1300 unsigned i = 0; | |
| 1301 // keep looping until we match | |
| 1302 while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength
)) { | |
| 1303 if (i == delta) | |
| 1304 return kNotFound; | |
| 1305 ++i; | |
| 1306 } | |
| 1307 return index + i; | |
| 1308 } | |
| 1309 | |
| 1310 size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index) | |
| 1311 { | |
| 1312 // Check for null or empty string to match against | |
| 1313 if (!matchString) | |
| 1314 return kNotFound; | |
| 1315 unsigned matchLength = matchString->length(); | |
| 1316 if (!matchLength) | |
| 1317 return min(index, length()); | |
| 1318 | |
| 1319 // Check index & matchLength are in range. | |
| 1320 if (index > length()) | |
| 1321 return kNotFound; | |
| 1322 unsigned searchLength = length() - index; | |
| 1323 if (matchLength > searchLength) | |
| 1324 return kNotFound; | |
| 1325 | |
| 1326 if (is8Bit()) { | |
| 1327 if (matchString->is8Bit()) | |
| 1328 return findIgnoringCaseInner(characters8() + index, matchString->cha
racters8(), index, searchLength, matchLength); | |
| 1329 return findIgnoringCaseInner(characters8() + index, matchString->charact
ers16(), index, searchLength, matchLength); | |
| 1330 } | |
| 1331 | |
| 1332 if (matchString->is8Bit()) | |
| 1333 return findIgnoringCaseInner(characters16() + index, matchString->charac
ters8(), index, searchLength, matchLength); | |
| 1334 | |
| 1335 return findIgnoringCaseInner(characters16() + index, matchString->characters
16(), index, searchLength, matchLength); | |
| 1336 } | |
| 1337 | |
| 1338 size_t StringImpl::findNextLineStart(unsigned index) | |
| 1339 { | |
| 1340 if (is8Bit()) | |
| 1341 return WTF::findNextLineStart(characters8(), m_length, index); | |
| 1342 return WTF::findNextLineStart(characters16(), m_length, index); | |
| 1343 } | |
| 1344 | |
| 1345 size_t StringImpl::count(LChar c) const | |
| 1346 { | |
| 1347 int count = 0; | |
| 1348 if (is8Bit()) { | |
| 1349 for (size_t i = 0; i < m_length; ++i) | |
| 1350 count += characters8()[i] == c; | |
| 1351 } else { | |
| 1352 for (size_t i = 0; i < m_length; ++i) | |
| 1353 count += characters16()[i] == c; | |
| 1354 } | |
| 1355 return count; | |
| 1356 } | |
| 1357 | |
| 1358 size_t StringImpl::reverseFind(UChar c, unsigned index) | |
| 1359 { | |
| 1360 if (is8Bit()) | |
| 1361 return WTF::reverseFind(characters8(), m_length, c, index); | |
| 1362 return WTF::reverseFind(characters16(), m_length, c, index); | |
| 1363 } | |
| 1364 | |
| 1365 template <typename SearchCharacterType, typename MatchCharacterType> | |
| 1366 ALWAYS_INLINE static size_t reverseFindInner(const SearchCharacterType* searchCh
aracters, const MatchCharacterType* matchCharacters, unsigned index, unsigned le
ngth, unsigned matchLength) | |
| 1367 { | |
| 1368 // Optimization: keep a running hash of the strings, | |
| 1369 // only call equal if the hashes match. | |
| 1370 | |
| 1371 // delta is the number of additional times to test; delta == 0 means test on
ly once. | |
| 1372 unsigned delta = min(index, length - matchLength); | |
| 1373 | |
| 1374 unsigned searchHash = 0; | |
| 1375 unsigned matchHash = 0; | |
| 1376 for (unsigned i = 0; i < matchLength; ++i) { | |
| 1377 searchHash += searchCharacters[delta + i]; | |
| 1378 matchHash += matchCharacters[i]; | |
| 1379 } | |
| 1380 | |
| 1381 // keep looping until we match | |
| 1382 while (searchHash != matchHash || !equal(searchCharacters + delta, matchChar
acters, matchLength)) { | |
| 1383 if (!delta) | |
| 1384 return kNotFound; | |
| 1385 --delta; | |
| 1386 searchHash -= searchCharacters[delta + matchLength]; | |
| 1387 searchHash += searchCharacters[delta]; | |
| 1388 } | |
| 1389 return delta; | |
| 1390 } | |
| 1391 | |
| 1392 size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index) | |
| 1393 { | |
| 1394 // Check for null or empty string to match against | |
| 1395 if (!matchString) | |
| 1396 return kNotFound; | |
| 1397 unsigned matchLength = matchString->length(); | |
| 1398 unsigned ourLength = length(); | |
| 1399 if (!matchLength) | |
| 1400 return min(index, ourLength); | |
| 1401 | |
| 1402 // Optimization 1: fast case for strings of length 1. | |
| 1403 if (matchLength == 1) { | |
| 1404 if (is8Bit()) | |
| 1405 return WTF::reverseFind(characters8(), ourLength, (*matchString)[0],
index); | |
| 1406 return WTF::reverseFind(characters16(), ourLength, (*matchString)[0], in
dex); | |
| 1407 } | |
| 1408 | |
| 1409 // Check index & matchLength are in range. | |
| 1410 if (matchLength > ourLength) | |
| 1411 return kNotFound; | |
| 1412 | |
| 1413 if (is8Bit()) { | |
| 1414 if (matchString->is8Bit()) | |
| 1415 return reverseFindInner(characters8(), matchString->characters8(), i
ndex, ourLength, matchLength); | |
| 1416 return reverseFindInner(characters8(), matchString->characters16(), inde
x, ourLength, matchLength); | |
| 1417 } | |
| 1418 | |
| 1419 if (matchString->is8Bit()) | |
| 1420 return reverseFindInner(characters16(), matchString->characters8(), inde
x, ourLength, matchLength); | |
| 1421 | |
| 1422 return reverseFindInner(characters16(), matchString->characters16(), index,
ourLength, matchLength); | |
| 1423 } | |
| 1424 | |
| 1425 template <typename SearchCharacterType, typename MatchCharacterType> | |
| 1426 ALWAYS_INLINE static size_t reverseFindIgnoringCaseInner(const SearchCharacterTy
pe* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index,
unsigned length, unsigned matchLength) | |
| 1427 { | |
| 1428 // delta is the number of additional times to test; delta == 0 means test on
ly once. | |
| 1429 unsigned delta = min(index, length - matchLength); | |
| 1430 | |
| 1431 // keep looping until we match | |
| 1432 while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLe
ngth)) { | |
| 1433 if (!delta) | |
| 1434 return kNotFound; | |
| 1435 --delta; | |
| 1436 } | |
| 1437 return delta; | |
| 1438 } | |
| 1439 | |
| 1440 size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned ind
ex) | |
| 1441 { | |
| 1442 // Check for null or empty string to match against | |
| 1443 if (!matchString) | |
| 1444 return kNotFound; | |
| 1445 unsigned matchLength = matchString->length(); | |
| 1446 unsigned ourLength = length(); | |
| 1447 if (!matchLength) | |
| 1448 return min(index, ourLength); | |
| 1449 | |
| 1450 // Check index & matchLength are in range. | |
| 1451 if (matchLength > ourLength) | |
| 1452 return kNotFound; | |
| 1453 | |
| 1454 if (is8Bit()) { | |
| 1455 if (matchString->is8Bit()) | |
| 1456 return reverseFindIgnoringCaseInner(characters8(), matchString->char
acters8(), index, ourLength, matchLength); | |
| 1457 return reverseFindIgnoringCaseInner(characters8(), matchString->characte
rs16(), index, ourLength, matchLength); | |
| 1458 } | |
| 1459 | |
| 1460 if (matchString->is8Bit()) | |
| 1461 return reverseFindIgnoringCaseInner(characters16(), matchString->charact
ers8(), index, ourLength, matchLength); | |
| 1462 | |
| 1463 return reverseFindIgnoringCaseInner(characters16(), matchString->characters1
6(), index, ourLength, matchLength); | |
| 1464 } | |
| 1465 | |
| 1466 ALWAYS_INLINE static bool equalInner(const StringImpl* stringImpl, unsigned star
tOffset, const char* matchString, unsigned matchLength, TextCaseSensitivity case
Sensitivity) | |
| 1467 { | |
| 1468 ASSERT(stringImpl); | |
| 1469 ASSERT(matchLength <= stringImpl->length()); | |
| 1470 ASSERT(startOffset + matchLength <= stringImpl->length()); | |
| 1471 | |
| 1472 if (caseSensitivity == TextCaseSensitive) { | |
| 1473 if (stringImpl->is8Bit()) | |
| 1474 return equal(stringImpl->characters8() + startOffset, reinterpret_ca
st<const LChar*>(matchString), matchLength); | |
| 1475 return equal(stringImpl->characters16() + startOffset, reinterpret_cast<
const LChar*>(matchString), matchLength); | |
| 1476 } | |
| 1477 if (stringImpl->is8Bit()) | 1392 if (stringImpl->is8Bit()) |
| 1478 return equalIgnoringCase(stringImpl->characters8() + startOffset, reinte
rpret_cast<const LChar*>(matchString), matchLength); | 1393 return equal(stringImpl->characters8() + startOffset, reinterpret_cast<con
st LChar*>(matchString), matchLength); |
| 1479 return equalIgnoringCase(stringImpl->characters16() + startOffset, reinterpr
et_cast<const LChar*>(matchString), matchLength); | 1394 return equal(stringImpl->characters16() + startOffset, reinterpret_cast<cons
t LChar*>(matchString), matchLength); |
| 1480 } | 1395 } |
| 1481 | 1396 if (stringImpl->is8Bit()) |
| 1482 bool StringImpl::startsWith(UChar character) const | 1397 return equalIgnoringCase(stringImpl->characters8() + startOffset, reinterpre
t_cast<const LChar*>(matchString), matchLength); |
| 1483 { | 1398 return equalIgnoringCase(stringImpl->characters16() + startOffset, reinterpret
_cast<const LChar*>(matchString), matchLength); |
| 1484 return m_length && (*this)[0] == character; | 1399 } |
| 1485 } | 1400 |
| 1486 | 1401 bool StringImpl::startsWith(UChar character) const { |
| 1487 bool StringImpl::startsWith(const char* matchString, unsigned matchLength, TextC
aseSensitivity caseSensitivity) const | 1402 return m_length && (*this)[0] == character; |
| 1488 { | 1403 } |
| 1489 ASSERT(matchLength); | 1404 |
| 1490 if (matchLength > length()) | 1405 bool StringImpl::startsWith(const char* matchString, unsigned matchLength, TextC
aseSensitivity caseSensitivity) const { |
| 1491 return false; | 1406 ASSERT(matchLength); |
| 1492 return equalInner(this, 0, matchString, matchLength, caseSensitivity); | 1407 if (matchLength > length()) |
| 1493 } | |
| 1494 | |
| 1495 bool StringImpl::endsWith(StringImpl* matchString, TextCaseSensitivity caseSensi
tivity) | |
| 1496 { | |
| 1497 ASSERT(matchString); | |
| 1498 if (m_length >= matchString->m_length) { | |
| 1499 unsigned start = m_length - matchString->m_length; | |
| 1500 if (caseSensitivity == TextCaseSensitive) | |
| 1501 return find(matchString, start) == start; | |
| 1502 return findIgnoringCase(matchString, start) == start; | |
| 1503 } | |
| 1504 return false; | 1408 return false; |
| 1505 } | 1409 return equalInner(this, 0, matchString, matchLength, caseSensitivity); |
| 1506 | 1410 } |
| 1507 bool StringImpl::endsWith(UChar character) const | 1411 |
| 1508 { | 1412 bool StringImpl::endsWith(StringImpl* matchString, TextCaseSensitivity caseSensi
tivity) { |
| 1509 return m_length && (*this)[m_length - 1] == character; | 1413 ASSERT(matchString); |
| 1510 } | 1414 if (m_length >= matchString->m_length) { |
| 1511 | 1415 unsigned start = m_length - matchString->m_length; |
| 1512 bool StringImpl::endsWith(const char* matchString, unsigned matchLength, TextCas
eSensitivity caseSensitivity) const | 1416 if (caseSensitivity == TextCaseSensitive) |
| 1513 { | 1417 return find(matchString, start) == start; |
| 1514 ASSERT(matchLength); | 1418 return findIgnoringCase(matchString, start) == start; |
| 1515 if (matchLength > length()) | 1419 } |
| 1516 return false; | 1420 return false; |
| 1517 unsigned startOffset = length() - matchLength; | 1421 } |
| 1518 return equalInner(this, startOffset, matchString, matchLength, caseSensitivi
ty); | 1422 |
| 1519 } | 1423 bool StringImpl::endsWith(UChar character) const { |
| 1520 | 1424 return m_length && (*this)[m_length - 1] == character; |
| 1521 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC) | 1425 } |
| 1522 { | 1426 |
| 1523 if (oldC == newC) | 1427 bool StringImpl::endsWith(const char* matchString, unsigned matchLength, TextCas
eSensitivity caseSensitivity) const { |
| 1524 return this; | 1428 ASSERT(matchLength); |
| 1525 | 1429 if (matchLength > length()) |
| 1526 if (find(oldC) == kNotFound) | 1430 return false; |
| 1527 return this; | 1431 unsigned startOffset = length() - matchLength; |
| 1528 | 1432 return equalInner(this, startOffset, matchString, matchLength, caseSensitivity
); |
| 1529 unsigned i; | 1433 } |
| 1530 if (is8Bit()) { | 1434 |
| 1531 if (newC <= 0xff) { | 1435 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC) { |
| 1532 LChar* data; | 1436 if (oldC == newC) |
| 1533 LChar oldChar = static_cast<LChar>(oldC); | 1437 return this; |
| 1534 LChar newChar = static_cast<LChar>(newC); | 1438 |
| 1535 | 1439 if (find(oldC) == kNotFound) |
| 1536 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | 1440 return this; |
| 1537 | 1441 |
| 1538 for (i = 0; i != m_length; ++i) { | 1442 unsigned i; |
| 1539 LChar ch = characters8()[i]; | 1443 if (is8Bit()) { |
| 1540 if (ch == oldChar) | 1444 if (newC <= 0xff) { |
| 1541 ch = newChar; | 1445 LChar* data; |
| 1542 data[i] = ch; | 1446 LChar oldChar = static_cast<LChar>(oldC); |
| 1543 } | 1447 LChar newChar = static_cast<LChar>(newC); |
| 1544 return newImpl.release(); | 1448 |
| 1545 } | 1449 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); |
| 1546 | 1450 |
| 1547 // There is the possibility we need to up convert from 8 to 16 bit, | 1451 for (i = 0; i != m_length; ++i) { |
| 1548 // create a 16 bit string for the result. | 1452 LChar ch = characters8()[i]; |
| 1549 UChar* data; | 1453 if (ch == oldChar) |
| 1550 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | 1454 ch = newChar; |
| 1551 | 1455 data[i] = ch; |
| 1552 for (i = 0; i != m_length; ++i) { | 1456 } |
| 1553 UChar ch = characters8()[i]; | 1457 return newImpl.release(); |
| 1554 if (ch == oldC) | 1458 } |
| 1555 ch = newC; | 1459 |
| 1556 data[i] = ch; | 1460 // There is the possibility we need to up convert from 8 to 16 bit, |
| 1557 } | 1461 // create a 16 bit string for the result. |
| 1558 | |
| 1559 return newImpl.release(); | |
| 1560 } | |
| 1561 | |
| 1562 UChar* data; | 1462 UChar* data; |
| 1563 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | 1463 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); |
| 1564 | 1464 |
| 1565 for (i = 0; i != m_length; ++i) { | 1465 for (i = 0; i != m_length; ++i) { |
| 1566 UChar ch = characters16()[i]; | 1466 UChar ch = characters8()[i]; |
| 1567 if (ch == oldC) | 1467 if (ch == oldC) |
| 1568 ch = newC; | 1468 ch = newC; |
| 1569 data[i] = ch; | 1469 data[i] = ch; |
| 1570 } | 1470 } |
| 1471 |
| 1571 return newImpl.release(); | 1472 return newImpl.release(); |
| 1572 } | 1473 } |
| 1573 | 1474 |
| 1574 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToR
eplace, StringImpl* str) | 1475 UChar* data; |
| 1575 { | 1476 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); |
| 1576 position = min(position, length()); | 1477 |
| 1577 lengthToReplace = min(lengthToReplace, length() - position); | 1478 for (i = 0; i != m_length; ++i) { |
| 1578 unsigned lengthToInsert = str ? str->length() : 0; | 1479 UChar ch = characters16()[i]; |
| 1579 if (!lengthToReplace && !lengthToInsert) | 1480 if (ch == oldC) |
| 1580 return this; | 1481 ch = newC; |
| 1581 | 1482 data[i] = ch; |
| 1582 RELEASE_ASSERT((length() - lengthToReplace) < (numeric_limits<unsigned>::max
() - lengthToInsert)); | 1483 } |
| 1583 | 1484 return newImpl.release(); |
| 1584 if (is8Bit() && (!str || str->is8Bit())) { | 1485 } |
| 1585 LChar* data; | 1486 |
| 1586 RefPtr<StringImpl> newImpl = | 1487 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToR
eplace, StringImpl* str) { |
| 1587 createUninitialized(length() - lengthToReplace + lengthToInsert, data); | 1488 position = min(position, length()); |
| 1588 memcpy(data, characters8(), position * sizeof(LChar)); | 1489 lengthToReplace = min(lengthToReplace, length() - position); |
| 1589 if (str) | 1490 unsigned lengthToInsert = str ? str->length() : 0; |
| 1590 memcpy(data + position, str->characters8(), lengthToInsert * sizeof(
LChar)); | 1491 if (!lengthToReplace && !lengthToInsert) |
| 1591 memcpy(data + position + lengthToInsert, characters8() + position + leng
thToReplace, | 1492 return this; |
| 1592 (length() - position - lengthToReplace) * sizeof(LChar)); | 1493 |
| 1593 return newImpl.release(); | 1494 RELEASE_ASSERT((length() - lengthToReplace) < (numeric_limits<unsigned>::max()
- lengthToInsert)); |
| 1594 } | 1495 |
| 1595 UChar* data; | 1496 if (is8Bit() && (!str || str->is8Bit())) { |
| 1497 LChar* data; |
| 1596 RefPtr<StringImpl> newImpl = | 1498 RefPtr<StringImpl> newImpl = |
| 1597 createUninitialized(length() - lengthToReplace + lengthToInsert, data); | 1499 createUninitialized(length() - lengthToReplace + lengthToInsert, data); |
| 1598 if (is8Bit()) | 1500 memcpy(data, characters8(), position * sizeof(LChar)); |
| 1599 for (unsigned i = 0; i < position; ++i) | 1501 if (str) |
| 1600 data[i] = characters8()[i]; | 1502 memcpy(data + position, str->characters8(), lengthToInsert * sizeof(LChar)
); |
| 1503 memcpy(data + position + lengthToInsert, characters8() + position + lengthTo
Replace, |
| 1504 (length() - position - lengthToReplace) * sizeof(LChar)); |
| 1505 return newImpl.release(); |
| 1506 } |
| 1507 UChar* data; |
| 1508 RefPtr<StringImpl> newImpl = |
| 1509 createUninitialized(length() - lengthToReplace + lengthToInsert, data); |
| 1510 if (is8Bit()) |
| 1511 for (unsigned i = 0; i < position; ++i) |
| 1512 data[i] = characters8()[i]; |
| 1513 else |
| 1514 memcpy(data, characters16(), position * sizeof(UChar)); |
| 1515 if (str) { |
| 1516 if (str->is8Bit()) |
| 1517 for (unsigned i = 0; i < lengthToInsert; ++i) |
| 1518 data[i + position] = str->characters8()[i]; |
| 1601 else | 1519 else |
| 1602 memcpy(data, characters16(), position * sizeof(UChar)); | 1520 memcpy(data + position, str->characters16(), lengthToInsert * sizeof(UChar
)); |
| 1603 if (str) { | 1521 } |
| 1604 if (str->is8Bit()) | 1522 if (is8Bit()) { |
| 1605 for (unsigned i = 0; i < lengthToInsert; ++i) | 1523 for (unsigned i = 0; i < length() - position - lengthToReplace; ++i) |
| 1606 data[i + position] = str->characters8()[i]; | 1524 data[i + position + lengthToInsert] = characters8()[i + position + lengthT
oReplace]; |
| 1607 else | 1525 } else { |
| 1608 memcpy(data + position, str->characters16(), lengthToInsert * sizeof
(UChar)); | 1526 memcpy(data + position + lengthToInsert, characters16() + position + lengthT
oReplace, |
| 1609 } | 1527 (length() - position - lengthToReplace) * sizeof(UChar)); |
| 1610 if (is8Bit()) { | 1528 } |
| 1611 for (unsigned i = 0; i < length() - position - lengthToReplace; ++i) | 1529 return newImpl.release(); |
| 1612 data[i + position + lengthToInsert] = characters8()[i + position + l
engthToReplace]; | 1530 } |
| 1613 } else { | 1531 |
| 1614 memcpy(data + position + lengthToInsert, characters16() + position + len
gthToReplace, | 1532 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacemen
t) { |
| 1615 (length() - position - lengthToReplace) * sizeof(UChar)); | 1533 if (!replacement) |
| 1616 } | 1534 return this; |
| 1535 |
| 1536 if (replacement->is8Bit()) |
| 1537 return replace(pattern, replacement->characters8(), replacement->length()); |
| 1538 |
| 1539 return replace(pattern, replacement->characters16(), replacement->length()); |
| 1540 } |
| 1541 |
| 1542 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const LChar* replaceme
nt, unsigned repStrLength) { |
| 1543 ASSERT(replacement); |
| 1544 |
| 1545 size_t srcSegmentStart = 0; |
| 1546 unsigned matchCount = 0; |
| 1547 |
| 1548 // Count the matches. |
| 1549 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) { |
| 1550 ++matchCount; |
| 1551 ++srcSegmentStart; |
| 1552 } |
| 1553 |
| 1554 // If we have 0 matches then we don't have to do any more work. |
| 1555 if (!matchCount) |
| 1556 return this; |
| 1557 |
| 1558 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max()
/ repStrLength); |
| 1559 |
| 1560 unsigned replaceSize = matchCount * repStrLength; |
| 1561 unsigned newSize = m_length - matchCount; |
| 1562 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize)); |
| 1563 |
| 1564 newSize += replaceSize; |
| 1565 |
| 1566 // Construct the new data. |
| 1567 size_t srcSegmentEnd; |
| 1568 unsigned srcSegmentLength; |
| 1569 srcSegmentStart = 0; |
| 1570 unsigned dstOffset = 0; |
| 1571 |
| 1572 if (is8Bit()) { |
| 1573 LChar* data; |
| 1574 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); |
| 1575 |
| 1576 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { |
| 1577 srcSegmentLength = srcSegmentEnd - srcSegmentStart; |
| 1578 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength
* sizeof(LChar)); |
| 1579 dstOffset += srcSegmentLength; |
| 1580 memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar)); |
| 1581 dstOffset += repStrLength; |
| 1582 srcSegmentStart = srcSegmentEnd + 1; |
| 1583 } |
| 1584 |
| 1585 srcSegmentLength = m_length - srcSegmentStart; |
| 1586 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength *
sizeof(LChar)); |
| 1587 |
| 1588 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); |
| 1589 |
| 1617 return newImpl.release(); | 1590 return newImpl.release(); |
| 1618 } | 1591 } |
| 1619 | 1592 |
| 1620 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacemen
t) | 1593 UChar* data; |
| 1621 { | 1594 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); |
| 1622 if (!replacement) | 1595 |
| 1623 return this; | 1596 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { |
| 1624 | 1597 srcSegmentLength = srcSegmentEnd - srcSegmentStart; |
| 1625 if (replacement->is8Bit()) | 1598 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength
* sizeof(UChar)); |
| 1626 return replace(pattern, replacement->characters8(), replacement->length(
)); | 1599 |
| 1627 | 1600 dstOffset += srcSegmentLength; |
| 1628 return replace(pattern, replacement->characters16(), replacement->length()); | 1601 for (unsigned i = 0; i < repStrLength; ++i) |
| 1629 } | 1602 data[i + dstOffset] = replacement[i]; |
| 1630 | 1603 |
| 1631 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const LChar* replaceme
nt, unsigned repStrLength) | 1604 dstOffset += repStrLength; |
| 1632 { | 1605 srcSegmentStart = srcSegmentEnd + 1; |
| 1633 ASSERT(replacement); | 1606 } |
| 1634 | 1607 |
| 1635 size_t srcSegmentStart = 0; | 1608 srcSegmentLength = m_length - srcSegmentStart; |
| 1636 unsigned matchCount = 0; | 1609 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength *
sizeof(UChar)); |
| 1637 | 1610 |
| 1638 // Count the matches. | 1611 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); |
| 1639 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) { | 1612 |
| 1640 ++matchCount; | 1613 return newImpl.release(); |
| 1641 ++srcSegmentStart; | 1614 } |
| 1642 } | 1615 |
| 1643 | 1616 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const UChar* replaceme
nt, unsigned repStrLength) { |
| 1644 // If we have 0 matches then we don't have to do any more work. | 1617 ASSERT(replacement); |
| 1645 if (!matchCount) | 1618 |
| 1646 return this; | 1619 size_t srcSegmentStart = 0; |
| 1647 | 1620 unsigned matchCount = 0; |
| 1648 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max(
) / repStrLength); | 1621 |
| 1649 | 1622 // Count the matches. |
| 1650 unsigned replaceSize = matchCount * repStrLength; | 1623 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) { |
| 1651 unsigned newSize = m_length - matchCount; | 1624 ++matchCount; |
| 1652 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize)); | 1625 ++srcSegmentStart; |
| 1653 | 1626 } |
| 1654 newSize += replaceSize; | 1627 |
| 1655 | 1628 // If we have 0 matches then we don't have to do any more work. |
| 1656 // Construct the new data. | 1629 if (!matchCount) |
| 1657 size_t srcSegmentEnd; | 1630 return this; |
| 1658 unsigned srcSegmentLength; | 1631 |
| 1659 srcSegmentStart = 0; | 1632 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max()
/ repStrLength); |
| 1660 unsigned dstOffset = 0; | 1633 |
| 1661 | 1634 unsigned replaceSize = matchCount * repStrLength; |
| 1662 if (is8Bit()) { | 1635 unsigned newSize = m_length - matchCount; |
| 1663 LChar* data; | 1636 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize)); |
| 1664 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | 1637 |
| 1665 | 1638 newSize += replaceSize; |
| 1666 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { | 1639 |
| 1667 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | 1640 // Construct the new data. |
| 1668 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegment
Length * sizeof(LChar)); | 1641 size_t srcSegmentEnd; |
| 1669 dstOffset += srcSegmentLength; | 1642 unsigned srcSegmentLength; |
| 1670 memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar)); | 1643 srcSegmentStart = 0; |
| 1671 dstOffset += repStrLength; | 1644 unsigned dstOffset = 0; |
| 1672 srcSegmentStart = srcSegmentEnd + 1; | 1645 |
| 1673 } | 1646 if (is8Bit()) { |
| 1674 | |
| 1675 srcSegmentLength = m_length - srcSegmentStart; | |
| 1676 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLeng
th * sizeof(LChar)); | |
| 1677 | |
| 1678 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); | |
| 1679 | |
| 1680 return newImpl.release(); | |
| 1681 } | |
| 1682 | |
| 1683 UChar* data; | 1647 UChar* data; |
| 1684 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | 1648 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); |
| 1685 | 1649 |
| 1686 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { | 1650 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { |
| 1687 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | 1651 srcSegmentLength = srcSegmentEnd - srcSegmentStart; |
| 1688 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLen
gth * sizeof(UChar)); | 1652 for (unsigned i = 0; i < srcSegmentLength; ++i) |
| 1689 | 1653 data[i + dstOffset] = characters8()[i + srcSegmentStart]; |
| 1690 dstOffset += srcSegmentLength; | 1654 |
| 1691 for (unsigned i = 0; i < repStrLength; ++i) | 1655 dstOffset += srcSegmentLength; |
| 1692 data[i + dstOffset] = replacement[i]; | 1656 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar)); |
| 1693 | 1657 |
| 1694 dstOffset += repStrLength; | 1658 dstOffset += repStrLength; |
| 1695 srcSegmentStart = srcSegmentEnd + 1; | 1659 srcSegmentStart = srcSegmentEnd + 1; |
| 1696 } | 1660 } |
| 1697 | 1661 |
| 1698 srcSegmentLength = m_length - srcSegmentStart; | 1662 srcSegmentLength = m_length - srcSegmentStart; |
| 1663 for (unsigned i = 0; i < srcSegmentLength; ++i) |
| 1664 data[i + dstOffset] = characters8()[i + srcSegmentStart]; |
| 1665 |
| 1666 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); |
| 1667 |
| 1668 return newImpl.release(); |
| 1669 } |
| 1670 |
| 1671 UChar* data; |
| 1672 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); |
| 1673 |
| 1674 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { |
| 1675 srcSegmentLength = srcSegmentEnd - srcSegmentStart; |
| 1699 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength
* sizeof(UChar)); | 1676 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength
* sizeof(UChar)); |
| 1700 | 1677 |
| 1701 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); | 1678 dstOffset += srcSegmentLength; |
| 1702 | 1679 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar)); |
| 1703 return newImpl.release(); | 1680 |
| 1704 } | 1681 dstOffset += repStrLength; |
| 1705 | 1682 srcSegmentStart = srcSegmentEnd + 1; |
| 1706 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const UChar* replaceme
nt, unsigned repStrLength) | 1683 } |
| 1707 { | 1684 |
| 1708 ASSERT(replacement); | 1685 srcSegmentLength = m_length - srcSegmentStart; |
| 1709 | 1686 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength *
sizeof(UChar)); |
| 1710 size_t srcSegmentStart = 0; | 1687 |
| 1711 unsigned matchCount = 0; | 1688 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); |
| 1712 | 1689 |
| 1713 // Count the matches. | 1690 return newImpl.release(); |
| 1714 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) { | 1691 } |
| 1715 ++matchCount; | 1692 |
| 1716 ++srcSegmentStart; | 1693 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* repl
acement) { |
| 1717 } | 1694 if (!pattern || !replacement) |
| 1718 | 1695 return this; |
| 1719 // If we have 0 matches then we don't have to do any more work. | 1696 |
| 1720 if (!matchCount) | 1697 unsigned patternLength = pattern->length(); |
| 1721 return this; | 1698 if (!patternLength) |
| 1722 | 1699 return this; |
| 1723 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max(
) / repStrLength); | 1700 |
| 1724 | 1701 unsigned repStrLength = replacement->length(); |
| 1725 unsigned replaceSize = matchCount * repStrLength; | 1702 size_t srcSegmentStart = 0; |
| 1726 unsigned newSize = m_length - matchCount; | 1703 unsigned matchCount = 0; |
| 1727 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize)); | 1704 |
| 1728 | 1705 // Count the matches. |
| 1729 newSize += replaceSize; | 1706 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) { |
| 1730 | 1707 ++matchCount; |
| 1731 // Construct the new data. | 1708 srcSegmentStart += patternLength; |
| 1732 size_t srcSegmentEnd; | 1709 } |
| 1733 unsigned srcSegmentLength; | 1710 |
| 1734 srcSegmentStart = 0; | 1711 // If we have 0 matches, we don't have to do any more work |
| 1735 unsigned dstOffset = 0; | 1712 if (!matchCount) |
| 1736 | 1713 return this; |
| 1737 if (is8Bit()) { | 1714 |
| 1738 UChar* data; | 1715 unsigned newSize = m_length - matchCount * patternLength; |
| 1739 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | 1716 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max()
/ repStrLength); |
| 1740 | 1717 |
| 1741 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { | 1718 RELEASE_ASSERT(newSize <= (numeric_limits<unsigned>::max() - matchCount * repS
trLength)); |
| 1742 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | 1719 |
| 1743 for (unsigned i = 0; i < srcSegmentLength; ++i) | 1720 newSize += matchCount * repStrLength; |
| 1744 data[i + dstOffset] = characters8()[i + srcSegmentStart]; | 1721 |
| 1745 | 1722 // Construct the new data |
| 1746 dstOffset += srcSegmentLength; | 1723 size_t srcSegmentEnd; |
| 1747 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar)); | 1724 unsigned srcSegmentLength; |
| 1748 | 1725 srcSegmentStart = 0; |
| 1749 dstOffset += repStrLength; | 1726 unsigned dstOffset = 0; |
| 1750 srcSegmentStart = srcSegmentEnd + 1; | 1727 bool srcIs8Bit = is8Bit(); |
| 1751 } | 1728 bool replacementIs8Bit = replacement->is8Bit(); |
| 1752 | 1729 |
| 1753 srcSegmentLength = m_length - srcSegmentStart; | 1730 // There are 4 cases: |
| 1754 for (unsigned i = 0; i < srcSegmentLength; ++i) | 1731 // 1. This and replacement are both 8 bit. |
| 1755 data[i + dstOffset] = characters8()[i + srcSegmentStart]; | 1732 // 2. This and replacement are both 16 bit. |
| 1756 | 1733 // 3. This is 8 bit and replacement is 16 bit. |
| 1757 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); | 1734 // 4. This is 16 bit and replacement is 8 bit. |
| 1758 | 1735 if (srcIs8Bit && replacementIs8Bit) { |
| 1759 return newImpl.release(); | 1736 // Case 1 |
| 1760 } | 1737 LChar* data; |
| 1761 | |
| 1762 UChar* data; | |
| 1763 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | |
| 1764 | |
| 1765 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { | |
| 1766 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
| 1767 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLen
gth * sizeof(UChar)); | |
| 1768 | |
| 1769 dstOffset += srcSegmentLength; | |
| 1770 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar)); | |
| 1771 | |
| 1772 dstOffset += repStrLength; | |
| 1773 srcSegmentStart = srcSegmentEnd + 1; | |
| 1774 } | |
| 1775 | |
| 1776 srcSegmentLength = m_length - srcSegmentStart; | |
| 1777 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength
* sizeof(UChar)); | |
| 1778 | |
| 1779 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); | |
| 1780 | |
| 1781 return newImpl.release(); | |
| 1782 } | |
| 1783 | |
| 1784 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* repl
acement) | |
| 1785 { | |
| 1786 if (!pattern || !replacement) | |
| 1787 return this; | |
| 1788 | |
| 1789 unsigned patternLength = pattern->length(); | |
| 1790 if (!patternLength) | |
| 1791 return this; | |
| 1792 | |
| 1793 unsigned repStrLength = replacement->length(); | |
| 1794 size_t srcSegmentStart = 0; | |
| 1795 unsigned matchCount = 0; | |
| 1796 | |
| 1797 // Count the matches. | |
| 1798 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) { | |
| 1799 ++matchCount; | |
| 1800 srcSegmentStart += patternLength; | |
| 1801 } | |
| 1802 | |
| 1803 // If we have 0 matches, we don't have to do any more work | |
| 1804 if (!matchCount) | |
| 1805 return this; | |
| 1806 | |
| 1807 unsigned newSize = m_length - matchCount * patternLength; | |
| 1808 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max(
) / repStrLength); | |
| 1809 | |
| 1810 RELEASE_ASSERT(newSize <= (numeric_limits<unsigned>::max() - matchCount * re
pStrLength)); | |
| 1811 | |
| 1812 newSize += matchCount * repStrLength; | |
| 1813 | |
| 1814 | |
| 1815 // Construct the new data | |
| 1816 size_t srcSegmentEnd; | |
| 1817 unsigned srcSegmentLength; | |
| 1818 srcSegmentStart = 0; | |
| 1819 unsigned dstOffset = 0; | |
| 1820 bool srcIs8Bit = is8Bit(); | |
| 1821 bool replacementIs8Bit = replacement->is8Bit(); | |
| 1822 | |
| 1823 // There are 4 cases: | |
| 1824 // 1. This and replacement are both 8 bit. | |
| 1825 // 2. This and replacement are both 16 bit. | |
| 1826 // 3. This is 8 bit and replacement is 16 bit. | |
| 1827 // 4. This is 16 bit and replacement is 8 bit. | |
| 1828 if (srcIs8Bit && replacementIs8Bit) { | |
| 1829 // Case 1 | |
| 1830 LChar* data; | |
| 1831 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | |
| 1832 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { | |
| 1833 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
| 1834 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegment
Length * sizeof(LChar)); | |
| 1835 dstOffset += srcSegmentLength; | |
| 1836 memcpy(data + dstOffset, replacement->characters8(), repStrLength *
sizeof(LChar)); | |
| 1837 dstOffset += repStrLength; | |
| 1838 srcSegmentStart = srcSegmentEnd + patternLength; | |
| 1839 } | |
| 1840 | |
| 1841 srcSegmentLength = m_length - srcSegmentStart; | |
| 1842 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLeng
th * sizeof(LChar)); | |
| 1843 | |
| 1844 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); | |
| 1845 | |
| 1846 return newImpl.release(); | |
| 1847 } | |
| 1848 | |
| 1849 UChar* data; | |
| 1850 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | 1738 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); |
| 1851 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { | 1739 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { |
| 1852 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | 1740 srcSegmentLength = srcSegmentEnd - srcSegmentStart; |
| 1853 if (srcIs8Bit) { | 1741 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength
* sizeof(LChar)); |
| 1854 // Case 3. | 1742 dstOffset += srcSegmentLength; |
| 1855 for (unsigned i = 0; i < srcSegmentLength; ++i) | 1743 memcpy(data + dstOffset, replacement->characters8(), repStrLength * sizeof
(LChar)); |
| 1856 data[i + dstOffset] = characters8()[i + srcSegmentStart]; | 1744 dstOffset += repStrLength; |
| 1857 } else { | 1745 srcSegmentStart = srcSegmentEnd + patternLength; |
| 1858 // Case 2 & 4. | |
| 1859 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmen
tLength * sizeof(UChar)); | |
| 1860 } | |
| 1861 dstOffset += srcSegmentLength; | |
| 1862 if (replacementIs8Bit) { | |
| 1863 // Cases 2 & 3. | |
| 1864 for (unsigned i = 0; i < repStrLength; ++i) | |
| 1865 data[i + dstOffset] = replacement->characters8()[i]; | |
| 1866 } else { | |
| 1867 // Case 4 | |
| 1868 memcpy(data + dstOffset, replacement->characters16(), repStrLength *
sizeof(UChar)); | |
| 1869 } | |
| 1870 dstOffset += repStrLength; | |
| 1871 srcSegmentStart = srcSegmentEnd + patternLength; | |
| 1872 } | 1746 } |
| 1873 | 1747 |
| 1874 srcSegmentLength = m_length - srcSegmentStart; | 1748 srcSegmentLength = m_length - srcSegmentStart; |
| 1749 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength *
sizeof(LChar)); |
| 1750 |
| 1751 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); |
| 1752 |
| 1753 return newImpl.release(); |
| 1754 } |
| 1755 |
| 1756 UChar* data; |
| 1757 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); |
| 1758 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { |
| 1759 srcSegmentLength = srcSegmentEnd - srcSegmentStart; |
| 1875 if (srcIs8Bit) { | 1760 if (srcIs8Bit) { |
| 1876 // Case 3. | 1761 // Case 3. |
| 1877 for (unsigned i = 0; i < srcSegmentLength; ++i) | 1762 for (unsigned i = 0; i < srcSegmentLength; ++i) |
| 1878 data[i + dstOffset] = characters8()[i + srcSegmentStart]; | 1763 data[i + dstOffset] = characters8()[i + srcSegmentStart]; |
| 1879 } else { | 1764 } else { |
| 1880 // Cases 2 & 4. | 1765 // Case 2 & 4. |
| 1881 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLen
gth * sizeof(UChar)); | 1766 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLengt
h * sizeof(UChar)); |
| 1882 } | 1767 } |
| 1883 | 1768 dstOffset += srcSegmentLength; |
| 1884 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); | 1769 if (replacementIs8Bit) { |
| 1885 | 1770 // Cases 2 & 3. |
| 1886 return newImpl.release(); | 1771 for (unsigned i = 0; i < repStrLength; ++i) |
| 1887 } | 1772 data[i + dstOffset] = replacement->characters8()[i]; |
| 1888 | 1773 } else { |
| 1889 PassRefPtr<StringImpl> StringImpl::upconvertedString() | 1774 // Case 4 |
| 1890 { | 1775 memcpy(data + dstOffset, replacement->characters16(), repStrLength * sizeo
f(UChar)); |
| 1891 if (is8Bit()) | 1776 } |
| 1892 return String::make16BitFrom8BitSource(characters8(), m_length).releaseI
mpl(); | 1777 dstOffset += repStrLength; |
| 1893 return this; | 1778 srcSegmentStart = srcSegmentEnd + patternLength; |
| 1894 } | 1779 } |
| 1895 | 1780 |
| 1896 static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl*
b) | 1781 srcSegmentLength = m_length - srcSegmentStart; |
| 1897 { | 1782 if (srcIs8Bit) { |
| 1898 unsigned aLength = a->length(); | 1783 // Case 3. |
| 1899 unsigned bLength = b->length(); | 1784 for (unsigned i = 0; i < srcSegmentLength; ++i) |
| 1900 if (aLength != bLength) | 1785 data[i + dstOffset] = characters8()[i + srcSegmentStart]; |
| 1786 } else { |
| 1787 // Cases 2 & 4. |
| 1788 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength
* sizeof(UChar)); |
| 1789 } |
| 1790 |
| 1791 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); |
| 1792 |
| 1793 return newImpl.release(); |
| 1794 } |
| 1795 |
| 1796 PassRefPtr<StringImpl> StringImpl::upconvertedString() { |
| 1797 if (is8Bit()) |
| 1798 return String::make16BitFrom8BitSource(characters8(), m_length).releaseImpl(
); |
| 1799 return this; |
| 1800 } |
| 1801 |
| 1802 static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl*
b) { |
| 1803 unsigned aLength = a->length(); |
| 1804 unsigned bLength = b->length(); |
| 1805 if (aLength != bLength) |
| 1806 return false; |
| 1807 |
| 1808 if (a->is8Bit()) { |
| 1809 if (b->is8Bit()) |
| 1810 return equal(a->characters8(), b->characters8(), aLength); |
| 1811 |
| 1812 return equal(a->characters8(), b->characters16(), aLength); |
| 1813 } |
| 1814 |
| 1815 if (b->is8Bit()) |
| 1816 return equal(a->characters16(), b->characters8(), aLength); |
| 1817 |
| 1818 return equal(a->characters16(), b->characters16(), aLength); |
| 1819 } |
| 1820 |
| 1821 bool equal(const StringImpl* a, const StringImpl* b) { |
| 1822 if (a == b) |
| 1823 return true; |
| 1824 if (!a || !b) |
| 1825 return false; |
| 1826 if (a->isAtomic() && b->isAtomic()) |
| 1827 return false; |
| 1828 |
| 1829 return stringImplContentEqual(a, b); |
| 1830 } |
| 1831 |
| 1832 template <typename CharType> |
| 1833 inline bool equalInternal(const StringImpl* a, const CharType* b, unsigned lengt
h) { |
| 1834 if (!a) |
| 1835 return !b; |
| 1836 if (!b) |
| 1837 return false; |
| 1838 |
| 1839 if (a->length() != length) |
| 1840 return false; |
| 1841 if (a->is8Bit()) |
| 1842 return equal(a->characters8(), b, length); |
| 1843 return equal(a->characters16(), b, length); |
| 1844 } |
| 1845 |
| 1846 bool equal(const StringImpl* a, const LChar* b, unsigned length) { |
| 1847 return equalInternal(a, b, length); |
| 1848 } |
| 1849 |
| 1850 bool equal(const StringImpl* a, const UChar* b, unsigned length) { |
| 1851 return equalInternal(a, b, length); |
| 1852 } |
| 1853 |
| 1854 bool equal(const StringImpl* a, const LChar* b) { |
| 1855 if (!a) |
| 1856 return !b; |
| 1857 if (!b) |
| 1858 return !a; |
| 1859 |
| 1860 unsigned length = a->length(); |
| 1861 |
| 1862 if (a->is8Bit()) { |
| 1863 const LChar* aPtr = a->characters8(); |
| 1864 for (unsigned i = 0; i != length; ++i) { |
| 1865 LChar bc = b[i]; |
| 1866 LChar ac = aPtr[i]; |
| 1867 if (!bc) |
| 1901 return false; | 1868 return false; |
| 1902 | 1869 if (ac != bc) |
| 1903 if (a->is8Bit()) { | |
| 1904 if (b->is8Bit()) | |
| 1905 return equal(a->characters8(), b->characters8(), aLength); | |
| 1906 | |
| 1907 return equal(a->characters8(), b->characters16(), aLength); | |
| 1908 } | |
| 1909 | |
| 1910 if (b->is8Bit()) | |
| 1911 return equal(a->characters16(), b->characters8(), aLength); | |
| 1912 | |
| 1913 return equal(a->characters16(), b->characters16(), aLength); | |
| 1914 } | |
| 1915 | |
| 1916 bool equal(const StringImpl* a, const StringImpl* b) | |
| 1917 { | |
| 1918 if (a == b) | |
| 1919 return true; | |
| 1920 if (!a || !b) | |
| 1921 return false; | 1870 return false; |
| 1922 if (a->isAtomic() && b->isAtomic()) | 1871 } |
| 1872 |
| 1873 return !b[length]; |
| 1874 } |
| 1875 |
| 1876 const UChar* aPtr = a->characters16(); |
| 1877 for (unsigned i = 0; i != length; ++i) { |
| 1878 LChar bc = b[i]; |
| 1879 if (!bc) |
| 1880 return false; |
| 1881 if (aPtr[i] != bc) |
| 1882 return false; |
| 1883 } |
| 1884 |
| 1885 return !b[length]; |
| 1886 } |
| 1887 |
| 1888 bool equalNonNull(const StringImpl* a, const StringImpl* b) { |
| 1889 ASSERT(a && b); |
| 1890 if (a == b) |
| 1891 return true; |
| 1892 |
| 1893 return stringImplContentEqual(a, b); |
| 1894 } |
| 1895 |
| 1896 bool equalIgnoringCase(const StringImpl* a, const StringImpl* b) { |
| 1897 if (a == b) |
| 1898 return true; |
| 1899 if (!a || !b) |
| 1900 return false; |
| 1901 |
| 1902 return CaseFoldingHash::equal(a, b); |
| 1903 } |
| 1904 |
| 1905 bool equalIgnoringCase(const StringImpl* a, const LChar* b) { |
| 1906 if (!a) |
| 1907 return !b; |
| 1908 if (!b) |
| 1909 return !a; |
| 1910 |
| 1911 unsigned length = a->length(); |
| 1912 |
| 1913 // Do a faster loop for the case where all the characters are ASCII. |
| 1914 UChar ored = 0; |
| 1915 bool equal = true; |
| 1916 if (a->is8Bit()) { |
| 1917 const LChar* as = a->characters8(); |
| 1918 for (unsigned i = 0; i != length; ++i) { |
| 1919 LChar bc = b[i]; |
| 1920 if (!bc) |
| 1923 return false; | 1921 return false; |
| 1924 | 1922 UChar ac = as[i]; |
| 1925 return stringImplContentEqual(a, b); | 1923 ored |= ac; |
| 1926 } | 1924 equal = equal && (toASCIILower(ac) == toASCIILower(bc)); |
| 1927 | |
| 1928 template <typename CharType> | |
| 1929 inline bool equalInternal(const StringImpl* a, const CharType* b, unsigned lengt
h) | |
| 1930 { | |
| 1931 if (!a) | |
| 1932 return !b; | |
| 1933 if (!b) | |
| 1934 return false; | |
| 1935 | |
| 1936 if (a->length() != length) | |
| 1937 return false; | |
| 1938 if (a->is8Bit()) | |
| 1939 return equal(a->characters8(), b, length); | |
| 1940 return equal(a->characters16(), b, length); | |
| 1941 } | |
| 1942 | |
| 1943 bool equal(const StringImpl* a, const LChar* b, unsigned length) | |
| 1944 { | |
| 1945 return equalInternal(a, b, length); | |
| 1946 } | |
| 1947 | |
| 1948 bool equal(const StringImpl* a, const UChar* b, unsigned length) | |
| 1949 { | |
| 1950 return equalInternal(a, b, length); | |
| 1951 } | |
| 1952 | |
| 1953 bool equal(const StringImpl* a, const LChar* b) | |
| 1954 { | |
| 1955 if (!a) | |
| 1956 return !b; | |
| 1957 if (!b) | |
| 1958 return !a; | |
| 1959 | |
| 1960 unsigned length = a->length(); | |
| 1961 | |
| 1962 if (a->is8Bit()) { | |
| 1963 const LChar* aPtr = a->characters8(); | |
| 1964 for (unsigned i = 0; i != length; ++i) { | |
| 1965 LChar bc = b[i]; | |
| 1966 LChar ac = aPtr[i]; | |
| 1967 if (!bc) | |
| 1968 return false; | |
| 1969 if (ac != bc) | |
| 1970 return false; | |
| 1971 } | |
| 1972 | |
| 1973 return !b[length]; | |
| 1974 } | |
| 1975 | |
| 1976 const UChar* aPtr = a->characters16(); | |
| 1977 for (unsigned i = 0; i != length; ++i) { | |
| 1978 LChar bc = b[i]; | |
| 1979 if (!bc) | |
| 1980 return false; | |
| 1981 if (aPtr[i] != bc) | |
| 1982 return false; | |
| 1983 } | |
| 1984 | |
| 1985 return !b[length]; | |
| 1986 } | |
| 1987 | |
| 1988 bool equalNonNull(const StringImpl* a, const StringImpl* b) | |
| 1989 { | |
| 1990 ASSERT(a && b); | |
| 1991 if (a == b) | |
| 1992 return true; | |
| 1993 | |
| 1994 return stringImplContentEqual(a, b); | |
| 1995 } | |
| 1996 | |
| 1997 bool equalIgnoringCase(const StringImpl* a, const StringImpl* b) | |
| 1998 { | |
| 1999 if (a == b) | |
| 2000 return true; | |
| 2001 if (!a || !b) | |
| 2002 return false; | |
| 2003 | |
| 2004 return CaseFoldingHash::equal(a, b); | |
| 2005 } | |
| 2006 | |
| 2007 bool equalIgnoringCase(const StringImpl* a, const LChar* b) | |
| 2008 { | |
| 2009 if (!a) | |
| 2010 return !b; | |
| 2011 if (!b) | |
| 2012 return !a; | |
| 2013 | |
| 2014 unsigned length = a->length(); | |
| 2015 | |
| 2016 // Do a faster loop for the case where all the characters are ASCII. | |
| 2017 UChar ored = 0; | |
| 2018 bool equal = true; | |
| 2019 if (a->is8Bit()) { | |
| 2020 const LChar* as = a->characters8(); | |
| 2021 for (unsigned i = 0; i != length; ++i) { | |
| 2022 LChar bc = b[i]; | |
| 2023 if (!bc) | |
| 2024 return false; | |
| 2025 UChar ac = as[i]; | |
| 2026 ored |= ac; | |
| 2027 equal = equal && (toASCIILower(ac) == toASCIILower(bc)); | |
| 2028 } | |
| 2029 | |
| 2030 // Do a slower implementation for cases that include non-ASCII character
s. | |
| 2031 if (ored & ~0x7F) { | |
| 2032 equal = true; | |
| 2033 for (unsigned i = 0; i != length; ++i) | |
| 2034 equal = equal && (foldCase(as[i]) == foldCase(b[i])); | |
| 2035 } | |
| 2036 | |
| 2037 return equal && !b[length]; | |
| 2038 } | |
| 2039 | |
| 2040 const UChar* as = a->characters16(); | |
| 2041 for (unsigned i = 0; i != length; ++i) { | |
| 2042 LChar bc = b[i]; | |
| 2043 if (!bc) | |
| 2044 return false; | |
| 2045 UChar ac = as[i]; | |
| 2046 ored |= ac; | |
| 2047 equal = equal && (toASCIILower(ac) == toASCIILower(bc)); | |
| 2048 } | 1925 } |
| 2049 | 1926 |
| 2050 // Do a slower implementation for cases that include non-ASCII characters. | 1927 // Do a slower implementation for cases that include non-ASCII characters. |
| 2051 if (ored & ~0x7F) { | 1928 if (ored & ~0x7F) { |
| 2052 equal = true; | 1929 equal = true; |
| 2053 for (unsigned i = 0; i != length; ++i) { | 1930 for (unsigned i = 0; i != length; ++i) |
| 2054 equal = equal && (foldCase(as[i]) == foldCase(b[i])); | 1931 equal = equal && (foldCase(as[i]) == foldCase(b[i])); |
| 2055 } | |
| 2056 } | 1932 } |
| 2057 | 1933 |
| 2058 return equal && !b[length]; | 1934 return equal && !b[length]; |
| 2059 } | 1935 } |
| 2060 | 1936 |
| 2061 bool equalIgnoringCaseNonNull(const StringImpl* a, const StringImpl* b) | 1937 const UChar* as = a->characters16(); |
| 2062 { | 1938 for (unsigned i = 0; i != length; ++i) { |
| 2063 ASSERT(a && b); | 1939 LChar bc = b[i]; |
| 2064 if (a == b) | 1940 if (!bc) |
| 2065 return true; | 1941 return false; |
| 2066 | 1942 UChar ac = as[i]; |
| 2067 unsigned length = a->length(); | 1943 ored |= ac; |
| 2068 if (length != b->length()) | 1944 equal = equal && (toASCIILower(ac) == toASCIILower(bc)); |
| 2069 return false; | 1945 } |
| 2070 | 1946 |
| 2071 if (a->is8Bit()) { | 1947 // Do a slower implementation for cases that include non-ASCII characters. |
| 2072 if (b->is8Bit()) | 1948 if (ored & ~0x7F) { |
| 2073 return equalIgnoringCase(a->characters8(), b->characters8(), length)
; | 1949 equal = true; |
| 2074 | 1950 for (unsigned i = 0; i != length; ++i) { |
| 2075 return equalIgnoringCase(b->characters16(), a->characters8(), length); | 1951 equal = equal && (foldCase(as[i]) == foldCase(b[i])); |
| 2076 } | 1952 } |
| 2077 | 1953 } |
| 1954 |
| 1955 return equal && !b[length]; |
| 1956 } |
| 1957 |
| 1958 bool equalIgnoringCaseNonNull(const StringImpl* a, const StringImpl* b) { |
| 1959 ASSERT(a && b); |
| 1960 if (a == b) |
| 1961 return true; |
| 1962 |
| 1963 unsigned length = a->length(); |
| 1964 if (length != b->length()) |
| 1965 return false; |
| 1966 |
| 1967 if (a->is8Bit()) { |
| 2078 if (b->is8Bit()) | 1968 if (b->is8Bit()) |
| 2079 return equalIgnoringCase(a->characters16(), b->characters8(), length); | 1969 return equalIgnoringCase(a->characters8(), b->characters8(), length); |
| 2080 | 1970 |
| 2081 return equalIgnoringCase(a->characters16(), b->characters16(), length); | 1971 return equalIgnoringCase(b->characters16(), a->characters8(), length); |
| 2082 } | 1972 } |
| 2083 | 1973 |
| 2084 bool equalIgnoringNullity(StringImpl* a, StringImpl* b) | 1974 if (b->is8Bit()) |
| 2085 { | 1975 return equalIgnoringCase(a->characters16(), b->characters8(), length); |
| 2086 if (!a && b && !b->length()) | 1976 |
| 2087 return true; | 1977 return equalIgnoringCase(a->characters16(), b->characters16(), length); |
| 2088 if (!b && a && !a->length()) | 1978 } |
| 2089 return true; | 1979 |
| 2090 return equal(a, b); | 1980 bool equalIgnoringNullity(StringImpl* a, StringImpl* b) { |
| 2091 } | 1981 if (!a && b && !b->length()) |
| 2092 | 1982 return true; |
| 2093 size_t StringImpl::sizeInBytes() const | 1983 if (!b && a && !a->length()) |
| 2094 { | 1984 return true; |
| 2095 size_t size = length(); | 1985 return equal(a, b); |
| 2096 if (!is8Bit()) | 1986 } |
| 2097 size *= 2; | 1987 |
| 2098 return size + sizeof(*this); | 1988 size_t StringImpl::sizeInBytes() const { |
| 2099 } | 1989 size_t size = length(); |
| 2100 | 1990 if (!is8Bit()) |
| 2101 UChar32 toUpper(UChar32 c, const AtomicString& localeIdentifier) | 1991 size *= 2; |
| 2102 { | 1992 return size + sizeof(*this); |
| 2103 if (!localeIdentifier.isNull()) { | 1993 } |
| 2104 if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(l
ocaleIdentifier, "az")) { | 1994 |
| 2105 if (c == 'i') | 1995 UChar32 toUpper(UChar32 c, const AtomicString& localeIdentifier) { |
| 2106 return latinCapitalLetterIWithDotAbove; | 1996 if (!localeIdentifier.isNull()) { |
| 2107 if (c == latinSmallLetterDotlessI) | 1997 if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(local
eIdentifier, "az")) { |
| 2108 return 'I'; | 1998 if (c == 'i') |
| 2109 } else if (localeIdMatchesLang(localeIdentifier, "lt")) { | 1999 return latinCapitalLetterIWithDotAbove; |
| 2110 // TODO(rob.buis) implement upper-casing rules for lt | 2000 if (c == latinSmallLetterDotlessI) |
| 2111 // like in StringImpl::upper(locale). | 2001 return 'I'; |
| 2112 } | 2002 } else if (localeIdMatchesLang(localeIdentifier, "lt")) { |
| 2113 } | 2003 // TODO(rob.buis) implement upper-casing rules for lt |
| 2114 | 2004 // like in StringImpl::upper(locale). |
| 2115 return toUpper(c); | 2005 } |
| 2116 } | 2006 } |
| 2117 | 2007 |
| 2118 } // namespace WTF | 2008 return toUpper(c); |
| 2009 } |
| 2010 |
| 2011 } // namespace WTF |
| OLD | NEW |