| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org) | 2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org) |
| 3 * (C) 1999 Antti Koivisto (koivisto@kde.org) | 3 * (C) 1999 Antti Koivisto (koivisto@kde.org) |
| 4 * (C) 2001 Dirk Mueller ( mueller@kde.org ) | 4 * (C) 2001 Dirk Mueller ( mueller@kde.org ) |
| 5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All r
ights reserved. | 5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All r
ights reserved. |
| 6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net) | 6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net) |
| 7 * | 7 * |
| 8 * This library is free software; you can redistribute it and/or | 8 * This library is free software; you can redistribute it and/or |
| 9 * modify it under the terms of the GNU Library General Public | 9 * modify it under the terms of the GNU Library General Public |
| 10 * License as published by the Free Software Foundation; either | 10 * License as published by the Free Software Foundation; either |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 48 #include "wtf/ThreadingPrimitives.h" | 48 #include "wtf/ThreadingPrimitives.h" |
| 49 #include <unistd.h> | 49 #include <unistd.h> |
| 50 #endif | 50 #endif |
| 51 | 51 |
| 52 using namespace std; | 52 using namespace std; |
| 53 | 53 |
| 54 namespace WTF { | 54 namespace WTF { |
| 55 | 55 |
| 56 using namespace Unicode; | 56 using namespace Unicode; |
| 57 | 57 |
| 58 static_assert(sizeof(StringImpl) == 3 * sizeof(int), "StringImpl should stay sma
ll"); | 58 static_assert(sizeof(StringImpl) == 3 * sizeof(int), |
| 59 "StringImpl should stay small"); |
| 59 | 60 |
| 60 #ifdef STRING_STATS | 61 #ifdef STRING_STATS |
| 61 | 62 |
| 62 static Mutex& statsMutex() | 63 static Mutex& statsMutex() { |
| 63 { | 64 DEFINE_STATIC_LOCAL(Mutex, mutex, ()); |
| 64 DEFINE_STATIC_LOCAL(Mutex, mutex, ()); | 65 return mutex; |
| 65 return mutex; | 66 } |
| 66 } | 67 |
| 67 | 68 static HashSet<void*>& liveStrings() { |
| 68 static HashSet<void*>& liveStrings() | 69 // Notice that we can't use HashSet<StringImpl*> because then HashSet would de
dup identical strings. |
| 69 { | 70 DEFINE_STATIC_LOCAL(HashSet<void*>, strings, ()); |
| 70 // Notice that we can't use HashSet<StringImpl*> because then HashSet would
dedup identical strings. | 71 return strings; |
| 71 DEFINE_STATIC_LOCAL(HashSet<void*>, strings, ()); | 72 } |
| 72 return strings; | 73 |
| 73 } | 74 void addStringForStats(StringImpl* string) { |
| 74 | 75 MutexLocker locker(statsMutex()); |
| 75 void addStringForStats(StringImpl* string) | 76 liveStrings().add(string); |
| 76 { | 77 } |
| 77 MutexLocker locker(statsMutex()); | 78 |
| 78 liveStrings().add(string); | 79 void removeStringForStats(StringImpl* string) { |
| 79 } | 80 MutexLocker locker(statsMutex()); |
| 80 | 81 liveStrings().remove(string); |
| 81 void removeStringForStats(StringImpl* string) | 82 } |
| 82 { | 83 |
| 83 MutexLocker locker(statsMutex()); | 84 static void fillWithSnippet(const StringImpl* string, Vector<char>& snippet) { |
| 84 liveStrings().remove(string); | 85 const unsigned kMaxSnippetLength = 64; |
| 85 } | 86 snippet.clear(); |
| 86 | 87 |
| 87 static void fillWithSnippet(const StringImpl* string, Vector<char>& snippet) | 88 size_t expectedLength = std::min(string->length(), kMaxSnippetLength); |
| 88 { | 89 if (expectedLength == kMaxSnippetLength) |
| 89 const unsigned kMaxSnippetLength = 64; | 90 expectedLength += 3; // For the "...". |
| 90 snippet.clear(); | 91 ++expectedLength; // For the terminating '\0'. |
| 91 | 92 snippet.reserveCapacity(expectedLength); |
| 92 size_t expectedLength = std::min(string->length(), kMaxSnippetLength); | 93 |
| 93 if (expectedLength == kMaxSnippetLength) | 94 size_t i; |
| 94 expectedLength += 3; // For the "...". | 95 for (i = 0; i < string->length() && i < kMaxSnippetLength; ++i) { |
| 95 ++expectedLength; // For the terminating '\0'. | 96 UChar c = (*string)[i]; |
| 96 snippet.reserveCapacity(expectedLength); | 97 if (isASCIIPrintable(c)) |
| 97 | 98 snippet.append(c); |
| 98 size_t i; | 99 else |
| 99 for (i = 0; i < string->length() && i < kMaxSnippetLength; ++i) { | 100 snippet.append('?'); |
| 100 UChar c = (*string)[i]; | 101 } |
| 101 if (isASCIIPrintable(c)) | 102 if (i < string->length()) { |
| 102 snippet.append(c); | 103 snippet.append('.'); |
| 103 else | 104 snippet.append('.'); |
| 104 snippet.append('?'); | 105 snippet.append('.'); |
| 105 } | 106 } |
| 106 if (i < string->length()) { | 107 snippet.append('\0'); |
| 107 snippet.append('.'); | 108 } |
| 108 snippet.append('.'); | 109 |
| 109 snippet.append('.'); | 110 static bool isUnnecessarilyWide(const StringImpl* string) { |
| 110 } | 111 if (string->is8Bit()) |
| 111 snippet.append('\0'); | 112 return false; |
| 112 } | 113 UChar c = 0; |
| 113 | 114 for (unsigned i = 0; i < string->length(); ++i) |
| 114 static bool isUnnecessarilyWide(const StringImpl* string) | 115 c |= (*string)[i] >> 8; |
| 115 { | 116 return !c; |
| 116 if (string->is8Bit()) | |
| 117 return false; | |
| 118 UChar c = 0; | |
| 119 for (unsigned i = 0; i < string->length(); ++i) | |
| 120 c |= (*string)[i] >> 8; | |
| 121 return !c; | |
| 122 } | 117 } |
| 123 | 118 |
| 124 class PerStringStats : public RefCounted<PerStringStats> { | 119 class PerStringStats : public RefCounted<PerStringStats> { |
| 125 public: | 120 public: |
| 126 static PassRefPtr<PerStringStats> create() | 121 static PassRefPtr<PerStringStats> create() { |
| 127 { | 122 return adoptRef(new PerStringStats); |
| 128 return adoptRef(new PerStringStats); | 123 } |
| 129 } | 124 |
| 130 | 125 void add(const StringImpl* string) { |
| 131 void add(const StringImpl* string) | 126 ++m_numberOfCopies; |
| 132 { | 127 if (!m_length) { |
| 133 ++m_numberOfCopies; | 128 m_length = string->length(); |
| 134 if (!m_length) { | 129 fillWithSnippet(string, m_snippet); |
| 135 m_length = string->length(); | 130 } |
| 136 fillWithSnippet(string, m_snippet); | 131 if (string->isAtomic()) |
| 137 } | 132 ++m_numberOfAtomicCopies; |
| 138 if (string->isAtomic()) | 133 if (isUnnecessarilyWide(string)) |
| 139 ++m_numberOfAtomicCopies; | 134 m_unnecessarilyWide = true; |
| 140 if (isUnnecessarilyWide(string)) | 135 } |
| 141 m_unnecessarilyWide = true; | 136 |
| 142 } | 137 size_t totalCharacters() const { return m_numberOfCopies * m_length; } |
| 143 | 138 |
| 144 size_t totalCharacters() const | 139 void print() { |
| 145 { | 140 const char* status = "ok"; |
| 146 return m_numberOfCopies * m_length; | 141 if (m_unnecessarilyWide) |
| 147 } | 142 status = "16"; |
| 148 | 143 dataLogF("%8u copies (%s) of length %8u %s\n", m_numberOfCopies, status, |
| 149 void print() | 144 m_length, m_snippet.data()); |
| 150 { | 145 } |
| 151 const char* status = "ok"; | 146 |
| 152 if (m_unnecessarilyWide) | 147 bool m_unnecessarilyWide; |
| 153 status = "16"; | 148 unsigned m_numberOfCopies; |
| 154 dataLogF("%8u copies (%s) of length %8u %s\n", m_numberOfCopies, status,
m_length, m_snippet.data()); | 149 unsigned m_length; |
| 155 } | 150 unsigned m_numberOfAtomicCopies; |
| 156 | 151 Vector<char> m_snippet; |
| 157 bool m_unnecessarilyWide; | 152 |
| 158 unsigned m_numberOfCopies; | 153 private: |
| 159 unsigned m_length; | 154 PerStringStats() |
| 160 unsigned m_numberOfAtomicCopies; | 155 : m_unnecessarilyWide(false), |
| 161 Vector<char> m_snippet; | 156 m_numberOfCopies(0), |
| 162 | 157 m_length(0), |
| 163 private: | 158 m_numberOfAtomicCopies(0) {} |
| 164 PerStringStats() | |
| 165 : m_unnecessarilyWide(false) | |
| 166 , m_numberOfCopies(0) | |
| 167 , m_length(0) | |
| 168 , m_numberOfAtomicCopies(0) | |
| 169 { | |
| 170 } | |
| 171 }; | 159 }; |
| 172 | 160 |
| 173 bool operator<(const RefPtr<PerStringStats>& a, const RefPtr<PerStringStats>& b) | 161 bool operator<(const RefPtr<PerStringStats>& a, |
| 174 { | 162 const RefPtr<PerStringStats>& b) { |
| 175 if (a->m_unnecessarilyWide != b->m_unnecessarilyWide) | 163 if (a->m_unnecessarilyWide != b->m_unnecessarilyWide) |
| 176 return !a->m_unnecessarilyWide && b->m_unnecessarilyWide; | 164 return !a->m_unnecessarilyWide && b->m_unnecessarilyWide; |
| 177 if (a->totalCharacters() != b->totalCharacters()) | 165 if (a->totalCharacters() != b->totalCharacters()) |
| 178 return a->totalCharacters() < b->totalCharacters(); | 166 return a->totalCharacters() < b->totalCharacters(); |
| 179 if (a->m_numberOfCopies != b->m_numberOfCopies) | 167 if (a->m_numberOfCopies != b->m_numberOfCopies) |
| 180 return a->m_numberOfCopies < b->m_numberOfCopies; | 168 return a->m_numberOfCopies < b->m_numberOfCopies; |
| 181 if (a->m_length != b->m_length) | 169 if (a->m_length != b->m_length) |
| 182 return a->m_length < b->m_length; | 170 return a->m_length < b->m_length; |
| 183 return a->m_numberOfAtomicCopies < b->m_numberOfAtomicCopies; | 171 return a->m_numberOfAtomicCopies < b->m_numberOfAtomicCopies; |
| 184 } | 172 } |
| 185 | 173 |
| 186 static void printLiveStringStats(void*) | 174 static void printLiveStringStats(void*) { |
| 187 { | 175 MutexLocker locker(statsMutex()); |
| 188 MutexLocker locker(statsMutex()); | 176 HashSet<void*>& strings = liveStrings(); |
| 189 HashSet<void*>& strings = liveStrings(); | 177 |
| 190 | 178 HashMap<StringImpl*, RefPtr<PerStringStats>> stats; |
| 191 HashMap<StringImpl*, RefPtr<PerStringStats>> stats; | 179 for (HashSet<void*>::iterator iter = strings.begin(); iter != strings.end(); |
| 192 for (HashSet<void*>::iterator iter = strings.begin(); iter != strings.end();
++iter) { | 180 ++iter) { |
| 193 StringImpl* string = static_cast<StringImpl*>(*iter); | 181 StringImpl* string = static_cast<StringImpl*>(*iter); |
| 194 HashMap<StringImpl*, RefPtr<PerStringStats>>::iterator entry = stats.fin
d(string); | 182 HashMap<StringImpl*, RefPtr<PerStringStats>>::iterator entry = |
| 195 RefPtr<PerStringStats> value = entry == stats.end() ? RefPtr<PerStringSt
ats>(PerStringStats::create()) : entry->value; | 183 stats.find(string); |
| 196 value->add(string); | 184 RefPtr<PerStringStats> value = |
| 197 stats.set(string, value.release()); | 185 entry == stats.end() ? RefPtr<PerStringStats>(PerStringStats::create()) |
| 198 } | 186 : entry->value; |
| 199 | 187 value->add(string); |
| 200 Vector<RefPtr<PerStringStats>> all; | 188 stats.set(string, value.release()); |
| 201 for (HashMap<StringImpl*, RefPtr<PerStringStats>>::iterator iter = stats.beg
in(); iter != stats.end(); ++iter) | 189 } |
| 202 all.append(iter->value); | 190 |
| 203 | 191 Vector<RefPtr<PerStringStats>> all; |
| 204 std::sort(all.begin(), all.end()); | 192 for (HashMap<StringImpl*, RefPtr<PerStringStats>>::iterator iter = |
| 205 std::reverse(all.begin(), all.end()); | 193 stats.begin(); |
| 206 for (size_t i = 0; i < 20 && i < all.size(); ++i) | 194 iter != stats.end(); ++iter) |
| 207 all[i]->print(); | 195 all.append(iter->value); |
| 196 |
| 197 std::sort(all.begin(), all.end()); |
| 198 std::reverse(all.begin(), all.end()); |
| 199 for (size_t i = 0; i < 20 && i < all.size(); ++i) |
| 200 all[i]->print(); |
| 208 } | 201 } |
| 209 | 202 |
| 210 StringStats StringImpl::m_stringStats; | 203 StringStats StringImpl::m_stringStats; |
| 211 | 204 |
| 212 unsigned StringStats::s_stringRemovesTillPrintStats = StringStats::s_printString
StatsFrequency; | 205 unsigned StringStats::s_stringRemovesTillPrintStats = |
| 213 | 206 StringStats::s_printStringStatsFrequency; |
| 214 void StringStats::removeString(StringImpl* string) | 207 |
| 215 { | 208 void StringStats::removeString(StringImpl* string) { |
| 216 unsigned length = string->length(); | 209 unsigned length = string->length(); |
| 217 --m_totalNumberStrings; | 210 --m_totalNumberStrings; |
| 218 | 211 |
| 219 if (string->is8Bit()) { | 212 if (string->is8Bit()) { |
| 220 --m_number8BitStrings; | 213 --m_number8BitStrings; |
| 221 m_total8BitData -= length; | 214 m_total8BitData -= length; |
| 222 } else { | 215 } else { |
| 223 --m_number16BitStrings; | 216 --m_number16BitStrings; |
| 224 m_total16BitData -= length; | 217 m_total16BitData -= length; |
| 225 } | 218 } |
| 226 | 219 |
| 227 if (!--s_stringRemovesTillPrintStats) { | 220 if (!--s_stringRemovesTillPrintStats) { |
| 228 s_stringRemovesTillPrintStats = s_printStringStatsFrequency; | 221 s_stringRemovesTillPrintStats = s_printStringStatsFrequency; |
| 229 printStats(); | 222 printStats(); |
| 230 } | 223 } |
| 231 } | 224 } |
| 232 | 225 |
| 233 void StringStats::printStats() | 226 void StringStats::printStats() { |
| 234 { | 227 dataLogF("String stats for process id %d:\n", getpid()); |
| 235 dataLogF("String stats for process id %d:\n", getpid()); | 228 |
| 236 | 229 unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitData; |
| 237 unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitDat
a; | 230 double percent8Bit = |
| 238 double percent8Bit = m_totalNumberStrings ? ((double)m_number8BitStrings * 1
00) / (double)m_totalNumberStrings : 0.0; | 231 m_totalNumberStrings |
| 239 double average8bitLength = m_number8BitStrings ? (double)m_total8BitData / (
double)m_number8BitStrings : 0.0; | 232 ? ((double)m_number8BitStrings * 100) / (double)m_totalNumberStrings |
| 240 dataLogF("%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length
%6.1f\n", m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData, av
erage8bitLength); | 233 : 0.0; |
| 241 | 234 double average8bitLength = |
| 242 double percent16Bit = m_totalNumberStrings ? ((double)m_number16BitStrings *
100) / (double)m_totalNumberStrings : 0.0; | 235 m_number8BitStrings |
| 243 double average16bitLength = m_number16BitStrings ? (double)m_total16BitData
/ (double)m_number16BitStrings : 0.0; | 236 ? (double)m_total8BitData / (double)m_number8BitStrings |
| 244 dataLogF("%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length
%6.1f\n", m_number16BitStrings, percent16Bit, m_total16BitData, m_total16BitData
* 2, average16bitLength); | 237 : 0.0; |
| 245 | 238 dataLogF( |
| 246 double averageLength = m_totalNumberStrings ? (double)totalNumberCharacters
/ (double)m_totalNumberStrings : 0.0; | 239 "%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length " |
| 247 unsigned long long totalDataBytes = m_total8BitData + m_total16BitData * 2; | 240 "%6.1f\n", |
| 248 dataLogF("%8u Total %12llu chars %12llu bytes avg length %
6.1f\n", m_totalNumberStrings, totalNumberCharacters, totalDataBytes, averageLen
gth); | 241 m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData, |
| 249 unsigned long long totalSavedBytes = m_total8BitData; | 242 average8bitLength); |
| 250 double percentSavings = totalSavedBytes ? ((double)totalSavedBytes * 100) /
(double)(totalDataBytes + totalSavedBytes) : 0.0; | 243 |
| 251 dataLogF(" Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes,
percentSavings); | 244 double percent16Bit = |
| 252 | 245 m_totalNumberStrings |
| 253 unsigned totalOverhead = m_totalNumberStrings * sizeof(StringImpl); | 246 ? ((double)m_number16BitStrings * 100) / (double)m_totalNumberStrings |
| 254 double overheadPercent = (double)totalOverhead / (double)totalDataBytes * 10
0; | 247 : 0.0; |
| 255 dataLogF(" StringImpl overheader: %8u (%5.2f%%)\n", totalOverhead, o
verheadPercent); | 248 double average16bitLength = |
| 256 | 249 m_number16BitStrings |
| 257 internal::callOnMainThread(&printLiveStringStats, nullptr); | 250 ? (double)m_total16BitData / (double)m_number16BitStrings |
| 251 : 0.0; |
| 252 dataLogF( |
| 253 "%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length " |
| 254 "%6.1f\n", |
| 255 m_number16BitStrings, percent16Bit, m_total16BitData, |
| 256 m_total16BitData * 2, average16bitLength); |
| 257 |
| 258 double averageLength = |
| 259 m_totalNumberStrings |
| 260 ? (double)totalNumberCharacters / (double)m_totalNumberStrings |
| 261 : 0.0; |
| 262 unsigned long long totalDataBytes = m_total8BitData + m_total16BitData * 2; |
| 263 dataLogF( |
| 264 "%8u Total %12llu chars %12llu bytes avg length " |
| 265 "%6.1f\n", |
| 266 m_totalNumberStrings, totalNumberCharacters, totalDataBytes, |
| 267 averageLength); |
| 268 unsigned long long totalSavedBytes = m_total8BitData; |
| 269 double percentSavings = totalSavedBytes |
| 270 ? ((double)totalSavedBytes * 100) / |
| 271 (double)(totalDataBytes + totalSavedBytes) |
| 272 : 0.0; |
| 273 dataLogF(" Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes, |
| 274 percentSavings); |
| 275 |
| 276 unsigned totalOverhead = m_totalNumberStrings * sizeof(StringImpl); |
| 277 double overheadPercent = (double)totalOverhead / (double)totalDataBytes * 100; |
| 278 dataLogF(" StringImpl overheader: %8u (%5.2f%%)\n", totalOverhead, |
| 279 overheadPercent); |
| 280 |
| 281 internal::callOnMainThread(&printLiveStringStats, nullptr); |
| 258 } | 282 } |
| 259 #endif | 283 #endif |
| 260 | 284 |
| 261 void* StringImpl::operator new(size_t size) | 285 void* StringImpl::operator new(size_t size) { |
| 262 { | 286 ASSERT(size == sizeof(StringImpl)); |
| 263 ASSERT(size == sizeof(StringImpl)); | 287 return Partitions::bufferMalloc(size, "WTF::StringImpl"); |
| 264 return Partitions::bufferMalloc(size, "WTF::StringImpl"); | 288 } |
| 265 } | 289 |
| 266 | 290 void StringImpl::operator delete(void* ptr) { |
| 267 void StringImpl::operator delete(void* ptr) | 291 Partitions::bufferFree(ptr); |
| 268 { | 292 } |
| 269 Partitions::bufferFree(ptr); | 293 |
| 270 } | 294 inline StringImpl::~StringImpl() { |
| 271 | 295 ASSERT(!isStatic()); |
| 272 inline StringImpl::~StringImpl() | 296 |
| 273 { | 297 STRING_STATS_REMOVE_STRING(this); |
| 274 ASSERT(!isStatic()); | 298 |
| 275 | 299 if (isAtomic()) |
| 276 STRING_STATS_REMOVE_STRING(this); | 300 AtomicString::remove(this); |
| 277 | 301 } |
| 278 if (isAtomic()) | 302 |
| 279 AtomicString::remove(this); | 303 void StringImpl::destroyIfNotStatic() { |
| 280 } | 304 if (!isStatic()) |
| 281 | 305 delete this; |
| 282 void StringImpl::destroyIfNotStatic() | 306 } |
| 283 { | 307 |
| 284 if (!isStatic()) | 308 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, |
| 285 delete this; | 309 LChar*& data) { |
| 286 } | 310 if (!length) { |
| 287 | 311 data = 0; |
| 288 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, LChar*&
data) | 312 return empty(); |
| 289 { | 313 } |
| 290 if (!length) { | 314 |
| 291 data = 0; | 315 // Allocate a single buffer large enough to contain the StringImpl |
| 292 return empty(); | 316 // struct as well as the data which it contains. This removes one |
| 293 } | 317 // heap allocation from this call. |
| 294 | 318 StringImpl* string = static_cast<StringImpl*>(Partitions::bufferMalloc( |
| 295 // Allocate a single buffer large enough to contain the StringImpl | 319 allocationSize<LChar>(length), "WTF::StringImpl")); |
| 296 // struct as well as the data which it contains. This removes one | 320 |
| 297 // heap allocation from this call. | 321 data = reinterpret_cast<LChar*>(string + 1); |
| 298 StringImpl* string = static_cast<StringImpl*>(Partitions::bufferMalloc(alloc
ationSize<LChar>(length), "WTF::StringImpl")); | 322 return adoptRef(new (string) StringImpl(length, Force8BitConstructor)); |
| 299 | 323 } |
| 300 data = reinterpret_cast<LChar*>(string + 1); | 324 |
| 301 return adoptRef(new (string) StringImpl(length, Force8BitConstructor)); | 325 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, |
| 302 } | 326 UChar*& data) { |
| 303 | 327 if (!length) { |
| 304 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*&
data) | 328 data = 0; |
| 305 { | 329 return empty(); |
| 306 if (!length) { | 330 } |
| 307 data = 0; | 331 |
| 308 return empty(); | 332 // Allocate a single buffer large enough to contain the StringImpl |
| 309 } | 333 // struct as well as the data which it contains. This removes one |
| 310 | 334 // heap allocation from this call. |
| 311 // Allocate a single buffer large enough to contain the StringImpl | 335 StringImpl* string = static_cast<StringImpl*>(Partitions::bufferMalloc( |
| 312 // struct as well as the data which it contains. This removes one | 336 allocationSize<UChar>(length), "WTF::StringImpl")); |
| 313 // heap allocation from this call. | 337 |
| 314 StringImpl* string = static_cast<StringImpl*>(Partitions::bufferMalloc(alloc
ationSize<UChar>(length), "WTF::StringImpl")); | 338 data = reinterpret_cast<UChar*>(string + 1); |
| 315 | 339 return adoptRef(new (string) StringImpl(length)); |
| 316 data = reinterpret_cast<UChar*>(string + 1); | 340 } |
| 317 return adoptRef(new (string) StringImpl(length)); | 341 |
| 318 } | 342 static StaticStringsTable& staticStrings() { |
| 319 | 343 DEFINE_STATIC_LOCAL(StaticStringsTable, staticStrings, ()); |
| 320 static StaticStringsTable& staticStrings() | 344 return staticStrings; |
| 321 { | |
| 322 DEFINE_STATIC_LOCAL(StaticStringsTable, staticStrings, ()); | |
| 323 return staticStrings; | |
| 324 } | 345 } |
| 325 | 346 |
| 326 #if ENABLE(ASSERT) | 347 #if ENABLE(ASSERT) |
| 327 static bool s_allowCreationOfStaticStrings = true; | 348 static bool s_allowCreationOfStaticStrings = true; |
| 328 #endif | 349 #endif |
| 329 | 350 |
| 330 const StaticStringsTable& StringImpl::allStaticStrings() | 351 const StaticStringsTable& StringImpl::allStaticStrings() { |
| 331 { | 352 return staticStrings(); |
| 332 return staticStrings(); | 353 } |
| 333 } | 354 |
| 334 | 355 void StringImpl::freezeStaticStrings() { |
| 335 void StringImpl::freezeStaticStrings() | 356 ASSERT(isMainThread()); |
| 336 { | |
| 337 ASSERT(isMainThread()); | |
| 338 | 357 |
| 339 #if ENABLE(ASSERT) | 358 #if ENABLE(ASSERT) |
| 340 s_allowCreationOfStaticStrings = false; | 359 s_allowCreationOfStaticStrings = false; |
| 341 #endif | 360 #endif |
| 342 } | 361 } |
| 343 | 362 |
| 344 unsigned StringImpl::m_highestStaticStringLength = 0; | 363 unsigned StringImpl::m_highestStaticStringLength = 0; |
| 345 | 364 |
| 346 StringImpl* StringImpl::createStatic(const char* string, unsigned length, unsign
ed hash) | 365 StringImpl* StringImpl::createStatic(const char* string, |
| 347 { | 366 unsigned length, |
| 348 ASSERT(s_allowCreationOfStaticStrings); | 367 unsigned hash) { |
| 349 ASSERT(string); | 368 ASSERT(s_allowCreationOfStaticStrings); |
| 350 ASSERT(length); | 369 ASSERT(string); |
| 351 | 370 ASSERT(length); |
| 352 StaticStringsTable::const_iterator it = staticStrings().find(hash); | 371 |
| 353 if (it != staticStrings().end()) { | 372 StaticStringsTable::const_iterator it = staticStrings().find(hash); |
| 354 ASSERT(!memcmp(string, it->value + 1, length * sizeof(LChar))); | 373 if (it != staticStrings().end()) { |
| 355 return it->value; | 374 ASSERT(!memcmp(string, it->value + 1, length * sizeof(LChar))); |
| 356 } | 375 return it->value; |
| 357 | 376 } |
| 358 // Allocate a single buffer large enough to contain the StringImpl | 377 |
| 359 // struct as well as the data which it contains. This removes one | 378 // Allocate a single buffer large enough to contain the StringImpl |
| 360 // heap allocation from this call. | 379 // struct as well as the data which it contains. This removes one |
| 361 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(Str
ingImpl)) / sizeof(LChar))); | 380 // heap allocation from this call. |
| 362 size_t size = sizeof(StringImpl) + length * sizeof(LChar); | 381 RELEASE_ASSERT(length <= |
| 363 | 382 ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / |
| 364 WTF_INTERNAL_LEAK_SANITIZER_DISABLED_SCOPE; | 383 sizeof(LChar))); |
| 365 StringImpl* impl = static_cast<StringImpl*>(Partitions::bufferMalloc(size, "
WTF::StringImpl")); | 384 size_t size = sizeof(StringImpl) + length * sizeof(LChar); |
| 366 | 385 |
| 367 LChar* data = reinterpret_cast<LChar*>(impl + 1); | 386 WTF_INTERNAL_LEAK_SANITIZER_DISABLED_SCOPE; |
| 368 impl = new (impl) StringImpl(length, hash, StaticString); | 387 StringImpl* impl = static_cast<StringImpl*>( |
| 369 memcpy(data, string, length * sizeof(LChar)); | 388 Partitions::bufferMalloc(size, "WTF::StringImpl")); |
| 389 |
| 390 LChar* data = reinterpret_cast<LChar*>(impl + 1); |
| 391 impl = new (impl) StringImpl(length, hash, StaticString); |
| 392 memcpy(data, string, length * sizeof(LChar)); |
| 370 #if ENABLE(ASSERT) | 393 #if ENABLE(ASSERT) |
| 371 impl->assertHashIsCorrect(); | 394 impl->assertHashIsCorrect(); |
| 372 #endif | 395 #endif |
| 373 | 396 |
| 374 ASSERT(isMainThread()); | 397 ASSERT(isMainThread()); |
| 375 m_highestStaticStringLength = std::max(m_highestStaticStringLength, length); | 398 m_highestStaticStringLength = std::max(m_highestStaticStringLength, length); |
| 376 staticStrings().add(hash, impl); | 399 staticStrings().add(hash, impl); |
| 377 WTF_ANNOTATE_BENIGN_RACE(impl, | 400 WTF_ANNOTATE_BENIGN_RACE(impl, |
| 378 "Benign race on the reference counter of a static string created by Stri
ngImpl::createStatic"); | 401 "Benign race on the reference counter of a static " |
| 379 | 402 "string created by StringImpl::createStatic"); |
| 380 return impl; | 403 |
| 381 } | 404 return impl; |
| 382 | 405 } |
| 383 void StringImpl::reserveStaticStringsCapacityForSize(unsigned size) | 406 |
| 384 { | 407 void StringImpl::reserveStaticStringsCapacityForSize(unsigned size) { |
| 385 ASSERT(s_allowCreationOfStaticStrings); | 408 ASSERT(s_allowCreationOfStaticStrings); |
| 386 staticStrings().reserveCapacityForSize(size); | 409 staticStrings().reserveCapacityForSize(size); |
| 387 } | 410 } |
| 388 | 411 |
| 389 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned leng
th) | 412 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, |
| 390 { | 413 unsigned length) { |
| 391 if (!characters || !length) | 414 if (!characters || !length) |
| 392 return empty(); | 415 return empty(); |
| 393 | 416 |
| 394 UChar* data; | 417 UChar* data; |
| 395 RefPtr<StringImpl> string = createUninitialized(length, data); | 418 RefPtr<StringImpl> string = createUninitialized(length, data); |
| 396 memcpy(data, characters, length * sizeof(UChar)); | 419 memcpy(data, characters, length * sizeof(UChar)); |
| 397 return string.release(); | 420 return string.release(); |
| 398 } | 421 } |
| 399 | 422 |
| 400 PassRefPtr<StringImpl> StringImpl::create(const LChar* characters, unsigned leng
th) | 423 PassRefPtr<StringImpl> StringImpl::create(const LChar* characters, |
| 401 { | 424 unsigned length) { |
| 402 if (!characters || !length) | 425 if (!characters || !length) |
| 403 return empty(); | 426 return empty(); |
| 404 | 427 |
| 405 LChar* data; | 428 LChar* data; |
| 406 RefPtr<StringImpl> string = createUninitialized(length, data); | 429 RefPtr<StringImpl> string = createUninitialized(length, data); |
| 407 memcpy(data, characters, length * sizeof(LChar)); | 430 memcpy(data, characters, length * sizeof(LChar)); |
| 408 return string.release(); | 431 return string.release(); |
| 409 } | 432 } |
| 410 | 433 |
| 411 PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters,
unsigned length) | 434 PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters, |
| 412 { | 435 unsigned length) { |
| 413 if (!characters || !length) | 436 if (!characters || !length) |
| 414 return empty(); | 437 return empty(); |
| 415 | 438 |
| 416 LChar* data; | 439 LChar* data; |
| 417 RefPtr<StringImpl> string = createUninitialized(length, data); | 440 RefPtr<StringImpl> string = createUninitialized(length, data); |
| 418 | 441 |
| 419 for (size_t i = 0; i < length; ++i) { | 442 for (size_t i = 0; i < length; ++i) { |
| 420 if (characters[i] & 0xff00) | 443 if (characters[i] & 0xff00) |
| 421 return create(characters, length); | 444 return create(characters, length); |
| 422 data[i] = static_cast<LChar>(characters[i]); | 445 data[i] = static_cast<LChar>(characters[i]); |
| 423 } | 446 } |
| 424 | 447 |
| 425 return string.release(); | 448 return string.release(); |
| 426 } | 449 } |
| 427 | 450 |
| 428 PassRefPtr<StringImpl> StringImpl::create(const LChar* string) | 451 PassRefPtr<StringImpl> StringImpl::create(const LChar* string) { |
| 429 { | 452 if (!string) |
| 430 if (!string) | 453 return empty(); |
| 431 return empty(); | 454 size_t length = strlen(reinterpret_cast<const char*>(string)); |
| 432 size_t length = strlen(reinterpret_cast<const char*>(string)); | 455 RELEASE_ASSERT(length <= numeric_limits<unsigned>::max()); |
| 433 RELEASE_ASSERT(length <= numeric_limits<unsigned>::max()); | 456 return create(string, length); |
| 434 return create(string, length); | 457 } |
| 435 } | 458 |
| 436 | 459 bool StringImpl::containsOnlyWhitespace() { |
| 437 bool StringImpl::containsOnlyWhitespace() | 460 // FIXME: The definition of whitespace here includes a number of characters |
| 438 { | 461 // that are not whitespace from the point of view of LayoutText; I wonder if |
| 439 // FIXME: The definition of whitespace here includes a number of characters | 462 // that's a problem in practice. |
| 440 // that are not whitespace from the point of view of LayoutText; I wonder if | 463 if (is8Bit()) { |
| 441 // that's a problem in practice. | |
| 442 if (is8Bit()) { | |
| 443 for (unsigned i = 0; i < m_length; ++i) { | |
| 444 UChar c = characters8()[i]; | |
| 445 if (!isASCIISpace(c)) | |
| 446 return false; | |
| 447 } | |
| 448 | |
| 449 return true; | |
| 450 } | |
| 451 | |
| 452 for (unsigned i = 0; i < m_length; ++i) { | 464 for (unsigned i = 0; i < m_length; ++i) { |
| 453 UChar c = characters16()[i]; | 465 UChar c = characters8()[i]; |
| 454 if (!isASCIISpace(c)) | 466 if (!isASCIISpace(c)) |
| 455 return false; | 467 return false; |
| 456 } | 468 } |
| 469 |
| 457 return true; | 470 return true; |
| 458 } | 471 } |
| 459 | 472 |
| 460 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length) | 473 for (unsigned i = 0; i < m_length; ++i) { |
| 461 { | 474 UChar c = characters16()[i]; |
| 462 if (start >= m_length) | 475 if (!isASCIISpace(c)) |
| 463 return empty(); | 476 return false; |
| 464 unsigned maxLength = m_length - start; | 477 } |
| 465 if (length >= maxLength) { | 478 return true; |
| 466 if (!start) | 479 } |
| 467 return this; | 480 |
| 468 length = maxLength; | 481 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length) { |
| 469 } | 482 if (start >= m_length) |
| 470 if (is8Bit()) | 483 return empty(); |
| 471 return create(characters8() + start, length); | 484 unsigned maxLength = m_length - start; |
| 472 | 485 if (length >= maxLength) { |
| 473 return create(characters16() + start, length); | 486 if (!start) |
| 474 } | 487 return this; |
| 475 | 488 length = maxLength; |
| 476 UChar32 StringImpl::characterStartingAt(unsigned i) | 489 } |
| 477 { | 490 if (is8Bit()) |
| 478 if (is8Bit()) | 491 return create(characters8() + start, length); |
| 479 return characters8()[i]; | 492 |
| 480 if (U16_IS_SINGLE(characters16()[i])) | 493 return create(characters16() + start, length); |
| 481 return characters16()[i]; | 494 } |
| 482 if (i + 1 < m_length && U16_IS_LEAD(characters16()[i]) && U16_IS_TRAIL(chara
cters16()[i + 1])) | 495 |
| 483 return U16_GET_SUPPLEMENTARY(characters16()[i], characters16()[i + 1]); | 496 UChar32 StringImpl::characterStartingAt(unsigned i) { |
| 484 return 0; | 497 if (is8Bit()) |
| 485 } | 498 return characters8()[i]; |
| 486 | 499 if (U16_IS_SINGLE(characters16()[i])) |
| 487 PassRefPtr<StringImpl> StringImpl::lowerASCII() | 500 return characters16()[i]; |
| 488 { | 501 if (i + 1 < m_length && U16_IS_LEAD(characters16()[i]) && |
| 489 | 502 U16_IS_TRAIL(characters16()[i + 1])) |
| 490 // First scan the string for uppercase and non-ASCII characters: | 503 return U16_GET_SUPPLEMENTARY(characters16()[i], characters16()[i + 1]); |
| 491 if (is8Bit()) { | 504 return 0; |
| 492 unsigned firstIndexToBeLowered = m_length; | 505 } |
| 493 for (unsigned i = 0; i < m_length; ++i) { | 506 |
| 494 LChar ch = characters8()[i]; | 507 PassRefPtr<StringImpl> StringImpl::lowerASCII() { |
| 495 if (isASCIIUpper(ch)) { | 508 // First scan the string for uppercase and non-ASCII characters: |
| 496 firstIndexToBeLowered = i; | 509 if (is8Bit()) { |
| 497 break; | 510 unsigned firstIndexToBeLowered = m_length; |
| 498 } | 511 for (unsigned i = 0; i < m_length; ++i) { |
| 499 } | 512 LChar ch = characters8()[i]; |
| 500 | 513 if (isASCIIUpper(ch)) { |
| 501 // Nothing to do if the string is all ASCII with no uppercase. | 514 firstIndexToBeLowered = i; |
| 502 if (firstIndexToBeLowered == m_length) { | 515 break; |
| 503 return this; | 516 } |
| 504 } | 517 } |
| 505 | 518 |
| 506 LChar* data8; | |
| 507 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8); | |
| 508 memcpy(data8, characters8(), firstIndexToBeLowered); | |
| 509 | |
| 510 for (unsigned i = firstIndexToBeLowered; i < m_length; ++i) { | |
| 511 LChar ch = characters8()[i]; | |
| 512 data8[i] = isASCIIUpper(ch) ? toASCIILower(ch) : ch; | |
| 513 } | |
| 514 return newImpl.release(); | |
| 515 } | |
| 516 bool noUpper = true; | |
| 517 UChar ored = 0; | |
| 518 | |
| 519 const UChar* end = characters16() + m_length; | |
| 520 for (const UChar* chp = characters16(); chp != end; ++chp) { | |
| 521 if (isASCIIUpper(*chp)) | |
| 522 noUpper = false; | |
| 523 ored |= *chp; | |
| 524 } | |
| 525 // Nothing to do if the string is all ASCII with no uppercase. | 519 // Nothing to do if the string is all ASCII with no uppercase. |
| 526 if (noUpper && !(ored & ~0x7F)) | 520 if (firstIndexToBeLowered == m_length) { |
| 527 return this; | 521 return this; |
| 528 | 522 } |
| 529 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<unsigned>::m
ax())); | 523 |
| 530 unsigned length = m_length; | 524 LChar* data8; |
| 531 | 525 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8); |
| 526 memcpy(data8, characters8(), firstIndexToBeLowered); |
| 527 |
| 528 for (unsigned i = firstIndexToBeLowered; i < m_length; ++i) { |
| 529 LChar ch = characters8()[i]; |
| 530 data8[i] = isASCIIUpper(ch) ? toASCIILower(ch) : ch; |
| 531 } |
| 532 return newImpl.release(); |
| 533 } |
| 534 bool noUpper = true; |
| 535 UChar ored = 0; |
| 536 |
| 537 const UChar* end = characters16() + m_length; |
| 538 for (const UChar* chp = characters16(); chp != end; ++chp) { |
| 539 if (isASCIIUpper(*chp)) |
| 540 noUpper = false; |
| 541 ored |= *chp; |
| 542 } |
| 543 // Nothing to do if the string is all ASCII with no uppercase. |
| 544 if (noUpper && !(ored & ~0x7F)) |
| 545 return this; |
| 546 |
| 547 RELEASE_ASSERT(m_length <= |
| 548 static_cast<unsigned>(numeric_limits<unsigned>::max())); |
| 549 unsigned length = m_length; |
| 550 |
| 551 UChar* data16; |
| 552 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); |
| 553 |
| 554 for (unsigned i = 0; i < length; ++i) { |
| 555 UChar c = characters16()[i]; |
| 556 data16[i] = isASCIIUpper(c) ? toASCIILower(c) : c; |
| 557 } |
| 558 return newImpl.release(); |
| 559 } |
| 560 |
| 561 PassRefPtr<StringImpl> StringImpl::lower() { |
| 562 // Note: This is a hot function in the Dromaeo benchmark, specifically the |
| 563 // no-op code path up through the first 'return' statement. |
| 564 |
| 565 // First scan the string for uppercase and non-ASCII characters: |
| 566 if (is8Bit()) { |
| 567 unsigned firstIndexToBeLowered = m_length; |
| 568 for (unsigned i = 0; i < m_length; ++i) { |
| 569 LChar ch = characters8()[i]; |
| 570 if (UNLIKELY(isASCIIUpper(ch) || ch & ~0x7F)) { |
| 571 firstIndexToBeLowered = i; |
| 572 break; |
| 573 } |
| 574 } |
| 575 |
| 576 // Nothing to do if the string is all ASCII with no uppercase. |
| 577 if (firstIndexToBeLowered == m_length) |
| 578 return this; |
| 579 |
| 580 LChar* data8; |
| 581 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8); |
| 582 memcpy(data8, characters8(), firstIndexToBeLowered); |
| 583 |
| 584 for (unsigned i = firstIndexToBeLowered; i < m_length; ++i) { |
| 585 LChar ch = characters8()[i]; |
| 586 data8[i] = UNLIKELY(ch & ~0x7F) ? static_cast<LChar>(Unicode::toLower(ch)) |
| 587 : toASCIILower(ch); |
| 588 } |
| 589 |
| 590 return newImpl.release(); |
| 591 } |
| 592 |
| 593 bool noUpper = true; |
| 594 UChar ored = 0; |
| 595 |
| 596 const UChar* end = characters16() + m_length; |
| 597 for (const UChar* chp = characters16(); chp != end; ++chp) { |
| 598 if (UNLIKELY(isASCIIUpper(*chp))) |
| 599 noUpper = false; |
| 600 ored |= *chp; |
| 601 } |
| 602 // Nothing to do if the string is all ASCII with no uppercase. |
| 603 if (noUpper && !(ored & ~0x7F)) |
| 604 return this; |
| 605 |
| 606 RELEASE_ASSERT(m_length <= |
| 607 static_cast<unsigned>(numeric_limits<int32_t>::max())); |
| 608 int32_t length = m_length; |
| 609 |
| 610 if (!(ored & ~0x7F)) { |
| 532 UChar* data16; | 611 UChar* data16; |
| 533 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); | 612 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); |
| 534 | 613 |
| 535 for (unsigned i = 0; i < length; ++i) { | 614 for (int32_t i = 0; i < length; ++i) { |
| 536 UChar c = characters16()[i]; | 615 UChar c = characters16()[i]; |
| 537 data16[i] = isASCIIUpper(c) ? toASCIILower(c) : c; | 616 data16[i] = toASCIILower(c); |
| 538 } | 617 } |
| 539 return newImpl.release(); | 618 return newImpl.release(); |
| 540 } | 619 } |
| 541 | 620 |
| 542 PassRefPtr<StringImpl> StringImpl::lower() | 621 // Do a slower implementation for cases that include non-ASCII characters. |
| 543 { | 622 UChar* data16; |
| 544 // Note: This is a hot function in the Dromaeo benchmark, specifically the | 623 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); |
| 545 // no-op code path up through the first 'return' statement. | 624 |
| 546 | 625 bool error; |
| 547 // First scan the string for uppercase and non-ASCII characters: | 626 int32_t realLength = |
| 627 Unicode::toLower(data16, length, characters16(), m_length, &error); |
| 628 if (!error && realLength == length) |
| 629 return newImpl.release(); |
| 630 |
| 631 newImpl = createUninitialized(realLength, data16); |
| 632 Unicode::toLower(data16, realLength, characters16(), m_length, &error); |
| 633 if (error) |
| 634 return this; |
| 635 return newImpl.release(); |
| 636 } |
| 637 |
| 638 PassRefPtr<StringImpl> StringImpl::upper() { |
| 639 // This function could be optimized for no-op cases the way lower() is, |
| 640 // but in empirical testing, few actual calls to upper() are no-ops, so |
| 641 // it wouldn't be worth the extra time for pre-scanning. |
| 642 |
| 643 RELEASE_ASSERT(m_length <= |
| 644 static_cast<unsigned>(numeric_limits<int32_t>::max())); |
| 645 int32_t length = m_length; |
| 646 |
| 647 if (is8Bit()) { |
| 648 LChar* data8; |
| 649 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8); |
| 650 |
| 651 // Do a faster loop for the case where all the characters are ASCII. |
| 652 LChar ored = 0; |
| 653 for (int i = 0; i < length; ++i) { |
| 654 LChar c = characters8()[i]; |
| 655 ored |= c; |
| 656 data8[i] = toASCIIUpper(c); |
| 657 } |
| 658 if (!(ored & ~0x7F)) |
| 659 return newImpl.release(); |
| 660 |
| 661 // Do a slower implementation for cases that include non-ASCII Latin-1 chara
cters. |
| 662 int numberSharpSCharacters = 0; |
| 663 |
| 664 // There are two special cases. |
| 665 // 1. latin-1 characters when converted to upper case are 16 bit characters
. |
| 666 // 2. Lower case sharp-S converts to "SS" (two characters) |
| 667 for (int32_t i = 0; i < length; ++i) { |
| 668 LChar c = characters8()[i]; |
| 669 if (UNLIKELY(c == smallLetterSharpSCharacter)) |
| 670 ++numberSharpSCharacters; |
| 671 UChar upper = static_cast<UChar>(Unicode::toUpper(c)); |
| 672 if (UNLIKELY(upper > 0xff)) { |
| 673 // Since this upper-cased character does not fit in an 8-bit string, we
need to take the 16-bit path. |
| 674 goto upconvert; |
| 675 } |
| 676 data8[i] = static_cast<LChar>(upper); |
| 677 } |
| 678 |
| 679 if (!numberSharpSCharacters) |
| 680 return newImpl.release(); |
| 681 |
| 682 // We have numberSSCharacters sharp-s characters, but none of the other spec
ial characters. |
| 683 newImpl = createUninitialized(m_length + numberSharpSCharacters, data8); |
| 684 |
| 685 LChar* dest = data8; |
| 686 |
| 687 for (int32_t i = 0; i < length; ++i) { |
| 688 LChar c = characters8()[i]; |
| 689 if (c == smallLetterSharpSCharacter) { |
| 690 *dest++ = 'S'; |
| 691 *dest++ = 'S'; |
| 692 } else { |
| 693 *dest++ = static_cast<LChar>(Unicode::toUpper(c)); |
| 694 } |
| 695 } |
| 696 |
| 697 return newImpl.release(); |
| 698 } |
| 699 |
| 700 upconvert: |
| 701 RefPtr<StringImpl> upconverted = upconvertedString(); |
| 702 const UChar* source16 = upconverted->characters16(); |
| 703 |
| 704 UChar* data16; |
| 705 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); |
| 706 |
| 707 // Do a faster loop for the case where all the characters are ASCII. |
| 708 UChar ored = 0; |
| 709 for (int i = 0; i < length; ++i) { |
| 710 UChar c = source16[i]; |
| 711 ored |= c; |
| 712 data16[i] = toASCIIUpper(c); |
| 713 } |
| 714 if (!(ored & ~0x7F)) |
| 715 return newImpl.release(); |
| 716 |
| 717 // Do a slower implementation for cases that include non-ASCII characters. |
| 718 bool error; |
| 719 int32_t realLength = |
| 720 Unicode::toUpper(data16, length, source16, m_length, &error); |
| 721 if (!error && realLength == length) |
| 722 return newImpl; |
| 723 newImpl = createUninitialized(realLength, data16); |
| 724 Unicode::toUpper(data16, realLength, source16, m_length, &error); |
| 725 if (error) |
| 726 return this; |
| 727 return newImpl.release(); |
| 728 } |
| 729 |
| 730 static inline bool localeIdMatchesLang(const AtomicString& localeId, |
| 731 const char* lang) { |
| 732 size_t langLength = strlen(lang); |
| 733 RELEASE_ASSERT(langLength >= 2 && langLength <= 3); |
| 734 if (!localeId.impl() || |
| 735 !localeId.impl()->startsWithIgnoringCase(lang, langLength)) |
| 736 return false; |
| 737 if (localeId.impl()->length() == langLength) |
| 738 return true; |
| 739 const UChar maybeDelimiter = (*localeId.impl())[langLength]; |
| 740 return maybeDelimiter == '-' || maybeDelimiter == '_' || |
| 741 maybeDelimiter == '@'; |
| 742 } |
| 743 |
| 744 typedef int32_t (*icuCaseConverter)(UChar*, |
| 745 int32_t, |
| 746 const UChar*, |
| 747 int32_t, |
| 748 const char*, |
| 749 UErrorCode*); |
| 750 |
| 751 static PassRefPtr<StringImpl> caseConvert(const UChar* source16, |
| 752 size_t length, |
| 753 icuCaseConverter converter, |
| 754 const char* locale, |
| 755 StringImpl* originalString) { |
| 756 UChar* data16; |
| 757 size_t targetLength = length; |
| 758 RefPtr<StringImpl> output = StringImpl::createUninitialized(length, data16); |
| 759 do { |
| 760 UErrorCode status = U_ZERO_ERROR; |
| 761 targetLength = |
| 762 converter(data16, targetLength, source16, length, locale, &status); |
| 763 if (U_SUCCESS(status)) { |
| 764 if (length > 0) |
| 765 return output->substring(0, targetLength); |
| 766 return output.release(); |
| 767 } |
| 768 if (status != U_BUFFER_OVERFLOW_ERROR) |
| 769 return originalString; |
| 770 // Expand the buffer. |
| 771 output = StringImpl::createUninitialized(targetLength, data16); |
| 772 } while (true); |
| 773 } |
| 774 |
| 775 PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier) { |
| 776 // Use the more-optimized code path most of the time. |
| 777 // Only Turkic (tr and az) languages and Lithuanian requires |
| 778 // locale-specific lowercasing rules. Even though CLDR has el-Lower, |
| 779 // it's identical to the locale-agnostic lowercasing. Context-dependent |
| 780 // handling of Greek capital sigma is built into the common lowercasing |
| 781 // function in ICU. |
| 782 const char* localeForConversion = 0; |
| 783 if (localeIdMatchesLang(localeIdentifier, "tr") || |
| 784 localeIdMatchesLang(localeIdentifier, "az")) |
| 785 localeForConversion = "tr"; |
| 786 else if (localeIdMatchesLang(localeIdentifier, "lt")) |
| 787 localeForConversion = "lt"; |
| 788 else |
| 789 return lower(); |
| 790 |
| 791 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max())) |
| 792 CRASH(); |
| 793 int length = m_length; |
| 794 |
| 795 RefPtr<StringImpl> upconverted = upconvertedString(); |
| 796 const UChar* source16 = upconverted->characters16(); |
| 797 return caseConvert(source16, length, u_strToLower, localeForConversion, this); |
| 798 } |
| 799 |
| 800 PassRefPtr<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier) { |
| 801 // Use the more-optimized code path most of the time. |
| 802 // Only Turkic (tr and az) languages and Greek require locale-specific |
| 803 // lowercasing rules. |
| 804 icu::UnicodeString transliteratorId; |
| 805 const char* localeForConversion = 0; |
| 806 if (localeIdMatchesLang(localeIdentifier, "tr") || |
| 807 localeIdMatchesLang(localeIdentifier, "az")) |
| 808 localeForConversion = "tr"; |
| 809 else if (localeIdMatchesLang(localeIdentifier, "el")) |
| 810 transliteratorId = UNICODE_STRING_SIMPLE("el-Upper"); |
| 811 else if (localeIdMatchesLang(localeIdentifier, "lt")) |
| 812 localeForConversion = "lt"; |
| 813 else |
| 814 return upper(); |
| 815 |
| 816 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max())) |
| 817 CRASH(); |
| 818 int length = m_length; |
| 819 |
| 820 RefPtr<StringImpl> upconverted = upconvertedString(); |
| 821 const UChar* source16 = upconverted->characters16(); |
| 822 |
| 823 if (localeForConversion) |
| 824 return caseConvert(source16, length, u_strToUpper, localeForConversion, |
| 825 this); |
| 826 |
| 827 // TODO(jungshik): Cache transliterator if perf penaly warrants it for Greek. |
| 828 UErrorCode status = U_ZERO_ERROR; |
| 829 OwnPtr<icu::Transliterator> translit = |
| 830 adoptPtr(icu::Transliterator::createInstance(transliteratorId, |
| 831 UTRANS_FORWARD, status)); |
| 832 if (U_FAILURE(status)) |
| 833 return upper(); |
| 834 |
| 835 // target will be copy-on-write. |
| 836 icu::UnicodeString target(false, source16, length); |
| 837 translit->transliterate(target); |
| 838 |
| 839 return create(target.getBuffer(), target.length()); |
| 840 } |
| 841 |
| 842 PassRefPtr<StringImpl> StringImpl::fill(UChar character) { |
| 843 if (!(character & ~0x7F)) { |
| 844 LChar* data; |
| 845 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); |
| 846 for (unsigned i = 0; i < m_length; ++i) |
| 847 data[i] = static_cast<LChar>(character); |
| 848 return newImpl.release(); |
| 849 } |
| 850 UChar* data; |
| 851 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); |
| 852 for (unsigned i = 0; i < m_length; ++i) |
| 853 data[i] = character; |
| 854 return newImpl.release(); |
| 855 } |
| 856 |
| 857 PassRefPtr<StringImpl> StringImpl::foldCase() { |
| 858 RELEASE_ASSERT(m_length <= |
| 859 static_cast<unsigned>(numeric_limits<int32_t>::max())); |
| 860 int32_t length = m_length; |
| 861 |
| 862 if (is8Bit()) { |
| 863 // Do a faster loop for the case where all the characters are ASCII. |
| 864 LChar* data; |
| 865 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); |
| 866 LChar ored = 0; |
| 867 |
| 868 for (int32_t i = 0; i < length; ++i) { |
| 869 LChar c = characters8()[i]; |
| 870 data[i] = toASCIILower(c); |
| 871 ored |= c; |
| 872 } |
| 873 |
| 874 if (!(ored & ~0x7F)) |
| 875 return newImpl.release(); |
| 876 |
| 877 // Do a slower implementation for cases that include non-ASCII Latin-1 chara
cters. |
| 878 for (int32_t i = 0; i < length; ++i) |
| 879 data[i] = static_cast<LChar>(Unicode::toLower(characters8()[i])); |
| 880 |
| 881 return newImpl.release(); |
| 882 } |
| 883 |
| 884 // Do a faster loop for the case where all the characters are ASCII. |
| 885 UChar* data; |
| 886 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); |
| 887 UChar ored = 0; |
| 888 for (int32_t i = 0; i < length; ++i) { |
| 889 UChar c = characters16()[i]; |
| 890 ored |= c; |
| 891 data[i] = toASCIILower(c); |
| 892 } |
| 893 if (!(ored & ~0x7F)) |
| 894 return newImpl.release(); |
| 895 |
| 896 // Do a slower implementation for cases that include non-ASCII characters. |
| 897 bool error; |
| 898 int32_t realLength = |
| 899 Unicode::foldCase(data, length, characters16(), m_length, &error); |
| 900 if (!error && realLength == length) |
| 901 return newImpl.release(); |
| 902 newImpl = createUninitialized(realLength, data); |
| 903 Unicode::foldCase(data, realLength, characters16(), m_length, &error); |
| 904 if (error) |
| 905 return this; |
| 906 return newImpl.release(); |
| 907 } |
| 908 |
| 909 template <class UCharPredicate> |
| 910 inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters( |
| 911 UCharPredicate predicate) { |
| 912 if (!m_length) |
| 913 return empty(); |
| 914 |
| 915 unsigned start = 0; |
| 916 unsigned end = m_length - 1; |
| 917 |
| 918 // skip white space from start |
| 919 while (start <= end && |
| 920 predicate(is8Bit() ? characters8()[start] : characters16()[start])) |
| 921 ++start; |
| 922 |
| 923 // only white space |
| 924 if (start > end) |
| 925 return empty(); |
| 926 |
| 927 // skip white space from end |
| 928 while (end && predicate(is8Bit() ? characters8()[end] : characters16()[end])) |
| 929 --end; |
| 930 |
| 931 if (!start && end == m_length - 1) |
| 932 return this; |
| 933 if (is8Bit()) |
| 934 return create(characters8() + start, end + 1 - start); |
| 935 return create(characters16() + start, end + 1 - start); |
| 936 } |
| 937 |
| 938 class UCharPredicate final { |
| 939 STACK_ALLOCATED(); |
| 940 |
| 941 public: |
| 942 inline UCharPredicate(CharacterMatchFunctionPtr function) |
| 943 : m_function(function) {} |
| 944 |
| 945 inline bool operator()(UChar ch) const { return m_function(ch); } |
| 946 |
| 947 private: |
| 948 const CharacterMatchFunctionPtr m_function; |
| 949 }; |
| 950 |
| 951 class SpaceOrNewlinePredicate final { |
| 952 STACK_ALLOCATED(); |
| 953 |
| 954 public: |
| 955 inline bool operator()(UChar ch) const { return isSpaceOrNewline(ch); } |
| 956 }; |
| 957 |
| 958 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace() { |
| 959 return stripMatchedCharacters(SpaceOrNewlinePredicate()); |
| 960 } |
| 961 |
| 962 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace( |
| 963 IsWhiteSpaceFunctionPtr isWhiteSpace) { |
| 964 return stripMatchedCharacters(UCharPredicate(isWhiteSpace)); |
| 965 } |
| 966 |
| 967 template <typename CharType> |
| 968 ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters( |
| 969 const CharType* characters, |
| 970 CharacterMatchFunctionPtr findMatch) { |
| 971 const CharType* from = characters; |
| 972 const CharType* fromend = from + m_length; |
| 973 |
| 974 // Assume the common case will not remove any characters |
| 975 while (from != fromend && !findMatch(*from)) |
| 976 ++from; |
| 977 if (from == fromend) |
| 978 return this; |
| 979 |
| 980 StringBuffer<CharType> data(m_length); |
| 981 CharType* to = data.characters(); |
| 982 unsigned outc = from - characters; |
| 983 |
| 984 if (outc) |
| 985 memcpy(to, characters, outc * sizeof(CharType)); |
| 986 |
| 987 while (true) { |
| 988 while (from != fromend && findMatch(*from)) |
| 989 ++from; |
| 990 while (from != fromend && !findMatch(*from)) |
| 991 to[outc++] = *from++; |
| 992 if (from == fromend) |
| 993 break; |
| 994 } |
| 995 |
| 996 data.shrink(outc); |
| 997 |
| 998 return data.release(); |
| 999 } |
| 1000 |
| 1001 PassRefPtr<StringImpl> StringImpl::removeCharacters( |
| 1002 CharacterMatchFunctionPtr findMatch) { |
| 1003 if (is8Bit()) |
| 1004 return removeCharacters(characters8(), findMatch); |
| 1005 return removeCharacters(characters16(), findMatch); |
| 1006 } |
| 1007 |
| 1008 template <typename CharType, class UCharPredicate> |
| 1009 inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace( |
| 1010 UCharPredicate predicate, |
| 1011 StripBehavior stripBehavior) { |
| 1012 StringBuffer<CharType> data(m_length); |
| 1013 |
| 1014 const CharType* from = getCharacters<CharType>(); |
| 1015 const CharType* fromend = from + m_length; |
| 1016 int outc = 0; |
| 1017 bool changedToSpace = false; |
| 1018 |
| 1019 CharType* to = data.characters(); |
| 1020 |
| 1021 if (stripBehavior == StripExtraWhiteSpace) { |
| 1022 while (true) { |
| 1023 while (from != fromend && predicate(*from)) { |
| 1024 if (*from != ' ') |
| 1025 changedToSpace = true; |
| 1026 ++from; |
| 1027 } |
| 1028 while (from != fromend && !predicate(*from)) |
| 1029 to[outc++] = *from++; |
| 1030 if (from != fromend) |
| 1031 to[outc++] = ' '; |
| 1032 else |
| 1033 break; |
| 1034 } |
| 1035 |
| 1036 if (outc > 0 && to[outc - 1] == ' ') |
| 1037 --outc; |
| 1038 } else { |
| 1039 for (; from != fromend; ++from) { |
| 1040 if (predicate(*from)) { |
| 1041 if (*from != ' ') |
| 1042 changedToSpace = true; |
| 1043 to[outc++] = ' '; |
| 1044 } else { |
| 1045 to[outc++] = *from; |
| 1046 } |
| 1047 } |
| 1048 } |
| 1049 |
| 1050 if (static_cast<unsigned>(outc) == m_length && !changedToSpace) |
| 1051 return this; |
| 1052 |
| 1053 data.shrink(outc); |
| 1054 |
| 1055 return data.release(); |
| 1056 } |
| 1057 |
| 1058 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace( |
| 1059 StripBehavior stripBehavior) { |
| 1060 if (is8Bit()) |
| 1061 return StringImpl::simplifyMatchedCharactersToSpace<LChar>( |
| 1062 SpaceOrNewlinePredicate(), stripBehavior); |
| 1063 return StringImpl::simplifyMatchedCharactersToSpace<UChar>( |
| 1064 SpaceOrNewlinePredicate(), stripBehavior); |
| 1065 } |
| 1066 |
| 1067 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace( |
| 1068 IsWhiteSpaceFunctionPtr isWhiteSpace, |
| 1069 StripBehavior stripBehavior) { |
| 1070 if (is8Bit()) |
| 1071 return StringImpl::simplifyMatchedCharactersToSpace<LChar>( |
| 1072 UCharPredicate(isWhiteSpace), stripBehavior); |
| 1073 return StringImpl::simplifyMatchedCharactersToSpace<UChar>( |
| 1074 UCharPredicate(isWhiteSpace), stripBehavior); |
| 1075 } |
| 1076 |
| 1077 int StringImpl::toIntStrict(bool* ok, int base) { |
| 1078 if (is8Bit()) |
| 1079 return charactersToIntStrict(characters8(), m_length, ok, base); |
| 1080 return charactersToIntStrict(characters16(), m_length, ok, base); |
| 1081 } |
| 1082 |
| 1083 unsigned StringImpl::toUIntStrict(bool* ok, int base) { |
| 1084 if (is8Bit()) |
| 1085 return charactersToUIntStrict(characters8(), m_length, ok, base); |
| 1086 return charactersToUIntStrict(characters16(), m_length, ok, base); |
| 1087 } |
| 1088 |
| 1089 int64_t StringImpl::toInt64Strict(bool* ok, int base) { |
| 1090 if (is8Bit()) |
| 1091 return charactersToInt64Strict(characters8(), m_length, ok, base); |
| 1092 return charactersToInt64Strict(characters16(), m_length, ok, base); |
| 1093 } |
| 1094 |
| 1095 uint64_t StringImpl::toUInt64Strict(bool* ok, int base) { |
| 1096 if (is8Bit()) |
| 1097 return charactersToUInt64Strict(characters8(), m_length, ok, base); |
| 1098 return charactersToUInt64Strict(characters16(), m_length, ok, base); |
| 1099 } |
| 1100 |
| 1101 int StringImpl::toInt(bool* ok) { |
| 1102 if (is8Bit()) |
| 1103 return charactersToInt(characters8(), m_length, ok); |
| 1104 return charactersToInt(characters16(), m_length, ok); |
| 1105 } |
| 1106 |
| 1107 unsigned StringImpl::toUInt(bool* ok) { |
| 1108 if (is8Bit()) |
| 1109 return charactersToUInt(characters8(), m_length, ok); |
| 1110 return charactersToUInt(characters16(), m_length, ok); |
| 1111 } |
| 1112 |
| 1113 int64_t StringImpl::toInt64(bool* ok) { |
| 1114 if (is8Bit()) |
| 1115 return charactersToInt64(characters8(), m_length, ok); |
| 1116 return charactersToInt64(characters16(), m_length, ok); |
| 1117 } |
| 1118 |
| 1119 uint64_t StringImpl::toUInt64(bool* ok) { |
| 1120 if (is8Bit()) |
| 1121 return charactersToUInt64(characters8(), m_length, ok); |
| 1122 return charactersToUInt64(characters16(), m_length, ok); |
| 1123 } |
| 1124 |
| 1125 double StringImpl::toDouble(bool* ok) { |
| 1126 if (is8Bit()) |
| 1127 return charactersToDouble(characters8(), m_length, ok); |
| 1128 return charactersToDouble(characters16(), m_length, ok); |
| 1129 } |
| 1130 |
| 1131 float StringImpl::toFloat(bool* ok) { |
| 1132 if (is8Bit()) |
| 1133 return charactersToFloat(characters8(), m_length, ok); |
| 1134 return charactersToFloat(characters16(), m_length, ok); |
| 1135 } |
| 1136 |
| 1137 // Table is based on ftp://ftp.unicode.org/Public/UNIDATA/CaseFolding.txt |
| 1138 const UChar StringImpl::latin1CaseFoldTable[256] = { |
| 1139 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, |
| 1140 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011, |
| 1141 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, |
| 1142 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023, |
| 1143 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, |
| 1144 0x002d, 0x002e, 0x002f, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, |
| 1145 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, |
| 1146 0x003f, 0x0040, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, |
| 1147 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 0x0070, |
| 1148 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, |
| 1149 0x007a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 0x0060, 0x0061, 0x0062, |
| 1150 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, |
| 1151 0x006c, 0x006d, 0x006e, 0x006f, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, |
| 1152 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, |
| 1153 0x007e, 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, |
| 1154 0x0087, 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, |
| 1155 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, |
| 1156 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, |
| 1157 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00aa, |
| 1158 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0, 0x00b1, 0x00b2, 0x00b3, |
| 1159 0x00b4, 0x03bc, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, |
| 1160 0x00bd, 0x00be, 0x00bf, 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, |
| 1161 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, |
| 1162 0x00ef, 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00d7, |
| 1163 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00df, 0x00e0, |
| 1164 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, |
| 1165 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 0x00f0, 0x00f1, 0x00f2, |
| 1166 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00f9, 0x00fa, 0x00fb, |
| 1167 0x00fc, 0x00fd, 0x00fe, 0x00ff, |
| 1168 }; |
| 1169 |
| 1170 bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length) { |
| 1171 while (length--) { |
| 1172 if (StringImpl::latin1CaseFoldTable[*a++] != |
| 1173 StringImpl::latin1CaseFoldTable[*b++]) |
| 1174 return false; |
| 1175 } |
| 1176 return true; |
| 1177 } |
| 1178 |
| 1179 bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length) { |
| 1180 while (length--) { |
| 1181 if (foldCase(*a++) != StringImpl::latin1CaseFoldTable[*b++]) |
| 1182 return false; |
| 1183 } |
| 1184 return true; |
| 1185 } |
| 1186 |
| 1187 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, |
| 1188 unsigned start) { |
| 1189 if (is8Bit()) |
| 1190 return WTF::find(characters8(), m_length, matchFunction, start); |
| 1191 return WTF::find(characters16(), m_length, matchFunction, start); |
| 1192 } |
| 1193 |
| 1194 size_t StringImpl::find(const LChar* matchString, unsigned index) { |
| 1195 // Check for null or empty string to match against |
| 1196 if (!matchString) |
| 1197 return kNotFound; |
| 1198 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString)); |
| 1199 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max()); |
| 1200 unsigned matchLength = matchStringLength; |
| 1201 if (!matchLength) |
| 1202 return min(index, length()); |
| 1203 |
| 1204 // Optimization 1: fast case for strings of length 1. |
| 1205 if (matchLength == 1) |
| 1206 return WTF::find(characters16(), length(), *matchString, index); |
| 1207 |
| 1208 // Check index & matchLength are in range. |
| 1209 if (index > length()) |
| 1210 return kNotFound; |
| 1211 unsigned searchLength = length() - index; |
| 1212 if (matchLength > searchLength) |
| 1213 return kNotFound; |
| 1214 // delta is the number of additional times to test; delta == 0 means test only
once. |
| 1215 unsigned delta = searchLength - matchLength; |
| 1216 |
| 1217 const UChar* searchCharacters = characters16() + index; |
| 1218 |
| 1219 // Optimization 2: keep a running hash of the strings, |
| 1220 // only call equal if the hashes match. |
| 1221 unsigned searchHash = 0; |
| 1222 unsigned matchHash = 0; |
| 1223 for (unsigned i = 0; i < matchLength; ++i) { |
| 1224 searchHash += searchCharacters[i]; |
| 1225 matchHash += matchString[i]; |
| 1226 } |
| 1227 |
| 1228 unsigned i = 0; |
| 1229 // keep looping until we match |
| 1230 while (searchHash != matchHash || |
| 1231 !equal(searchCharacters + i, matchString, matchLength)) { |
| 1232 if (i == delta) |
| 1233 return kNotFound; |
| 1234 searchHash += searchCharacters[i + matchLength]; |
| 1235 searchHash -= searchCharacters[i]; |
| 1236 ++i; |
| 1237 } |
| 1238 return index + i; |
| 1239 } |
| 1240 |
| 1241 template <typename CharType> |
| 1242 ALWAYS_INLINE size_t findIgnoringCaseInternal(const CharType* searchCharacters, |
| 1243 const LChar* matchString, |
| 1244 unsigned index, |
| 1245 unsigned searchLength, |
| 1246 unsigned matchLength) { |
| 1247 // delta is the number of additional times to test; delta == 0 means test only
once. |
| 1248 unsigned delta = searchLength - matchLength; |
| 1249 |
| 1250 unsigned i = 0; |
| 1251 while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) { |
| 1252 if (i == delta) |
| 1253 return kNotFound; |
| 1254 ++i; |
| 1255 } |
| 1256 return index + i; |
| 1257 } |
| 1258 |
| 1259 size_t StringImpl::findIgnoringCase(const LChar* matchString, unsigned index) { |
| 1260 // Check for null or empty string to match against |
| 1261 if (!matchString) |
| 1262 return kNotFound; |
| 1263 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString)); |
| 1264 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max()); |
| 1265 unsigned matchLength = matchStringLength; |
| 1266 if (!matchLength) |
| 1267 return min(index, length()); |
| 1268 |
| 1269 // Check index & matchLength are in range. |
| 1270 if (index > length()) |
| 1271 return kNotFound; |
| 1272 unsigned searchLength = length() - index; |
| 1273 if (matchLength > searchLength) |
| 1274 return kNotFound; |
| 1275 |
| 1276 if (is8Bit()) |
| 1277 return findIgnoringCaseInternal(characters8() + index, matchString, index, |
| 1278 searchLength, matchLength); |
| 1279 return findIgnoringCaseInternal(characters16() + index, matchString, index, |
| 1280 searchLength, matchLength); |
| 1281 } |
| 1282 |
| 1283 template <typename SearchCharacterType, typename MatchCharacterType> |
| 1284 ALWAYS_INLINE static size_t findInternal( |
| 1285 const SearchCharacterType* searchCharacters, |
| 1286 const MatchCharacterType* matchCharacters, |
| 1287 unsigned index, |
| 1288 unsigned searchLength, |
| 1289 unsigned matchLength) { |
| 1290 // Optimization: keep a running hash of the strings, |
| 1291 // only call equal() if the hashes match. |
| 1292 |
| 1293 // delta is the number of additional times to test; delta == 0 means test only
once. |
| 1294 unsigned delta = searchLength - matchLength; |
| 1295 |
| 1296 unsigned searchHash = 0; |
| 1297 unsigned matchHash = 0; |
| 1298 |
| 1299 for (unsigned i = 0; i < matchLength; ++i) { |
| 1300 searchHash += searchCharacters[i]; |
| 1301 matchHash += matchCharacters[i]; |
| 1302 } |
| 1303 |
| 1304 unsigned i = 0; |
| 1305 // keep looping until we match |
| 1306 while (searchHash != matchHash || |
| 1307 !equal(searchCharacters + i, matchCharacters, matchLength)) { |
| 1308 if (i == delta) |
| 1309 return kNotFound; |
| 1310 searchHash += searchCharacters[i + matchLength]; |
| 1311 searchHash -= searchCharacters[i]; |
| 1312 ++i; |
| 1313 } |
| 1314 return index + i; |
| 1315 } |
| 1316 |
| 1317 size_t StringImpl::find(StringImpl* matchString) { |
| 1318 // Check for null string to match against |
| 1319 if (UNLIKELY(!matchString)) |
| 1320 return kNotFound; |
| 1321 unsigned matchLength = matchString->length(); |
| 1322 |
| 1323 // Optimization 1: fast case for strings of length 1. |
| 1324 if (matchLength == 1) { |
| 548 if (is8Bit()) { | 1325 if (is8Bit()) { |
| 549 unsigned firstIndexToBeLowered = m_length; | 1326 if (matchString->is8Bit()) |
| 550 for (unsigned i = 0; i < m_length; ++i) { | 1327 return WTF::find(characters8(), length(), |
| 551 LChar ch = characters8()[i]; | 1328 matchString->characters8()[0]); |
| 552 if (UNLIKELY(isASCIIUpper(ch) || ch & ~0x7F)) { | 1329 return WTF::find(characters8(), length(), matchString->characters16()[0]); |
| 553 firstIndexToBeLowered = i; | 1330 } |
| 554 break; | 1331 if (matchString->is8Bit()) |
| 555 } | 1332 return WTF::find(characters16(), length(), matchString->characters8()[0]); |
| 556 } | 1333 return WTF::find(characters16(), length(), matchString->characters16()[0]); |
| 557 | 1334 } |
| 558 // Nothing to do if the string is all ASCII with no uppercase. | 1335 |
| 559 if (firstIndexToBeLowered == m_length) | 1336 // Check matchLength is in range. |
| 560 return this; | 1337 if (matchLength > length()) |
| 561 | 1338 return kNotFound; |
| 562 LChar* data8; | 1339 |
| 563 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8); | 1340 // Check for empty string to match against |
| 564 memcpy(data8, characters8(), firstIndexToBeLowered); | 1341 if (UNLIKELY(!matchLength)) |
| 565 | 1342 return 0; |
| 566 for (unsigned i = firstIndexToBeLowered; i < m_length; ++i) { | 1343 |
| 567 LChar ch = characters8()[i]; | 1344 if (is8Bit()) { |
| 568 data8[i] = UNLIKELY(ch & ~0x7F) | 1345 if (matchString->is8Bit()) |
| 569 ? static_cast<LChar>(Unicode::toLower(ch)) : toASCIILower(ch); | 1346 return findInternal(characters8(), matchString->characters8(), 0, |
| 570 } | 1347 length(), matchLength); |
| 571 | 1348 return findInternal(characters8(), matchString->characters16(), 0, length(), |
| 572 return newImpl.release(); | 1349 matchLength); |
| 573 } | 1350 } |
| 574 | 1351 |
| 575 bool noUpper = true; | 1352 if (matchString->is8Bit()) |
| 576 UChar ored = 0; | 1353 return findInternal(characters16(), matchString->characters8(), 0, length(), |
| 577 | 1354 matchLength); |
| 578 const UChar* end = characters16() + m_length; | 1355 |
| 579 for (const UChar* chp = characters16(); chp != end; ++chp) { | 1356 return findInternal(characters16(), matchString->characters16(), 0, length(), |
| 580 if (UNLIKELY(isASCIIUpper(*chp))) | 1357 matchLength); |
| 581 noUpper = false; | 1358 } |
| 582 ored |= *chp; | 1359 |
| 583 } | 1360 size_t StringImpl::find(StringImpl* matchString, unsigned index) { |
| 584 // Nothing to do if the string is all ASCII with no uppercase. | 1361 // Check for null or empty string to match against |
| 585 if (noUpper && !(ored & ~0x7F)) | 1362 if (UNLIKELY(!matchString)) |
| 586 return this; | 1363 return kNotFound; |
| 587 | 1364 |
| 588 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::ma
x())); | 1365 unsigned matchLength = matchString->length(); |
| 589 int32_t length = m_length; | 1366 |
| 590 | 1367 // Optimization 1: fast case for strings of length 1. |
| 591 if (!(ored & ~0x7F)) { | 1368 if (matchLength == 1) { |
| 592 UChar* data16; | 1369 if (is8Bit()) |
| 593 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); | 1370 return WTF::find(characters8(), length(), (*matchString)[0], index); |
| 594 | 1371 return WTF::find(characters16(), length(), (*matchString)[0], index); |
| 595 for (int32_t i = 0; i < length; ++i) { | 1372 } |
| 596 UChar c = characters16()[i]; | 1373 |
| 597 data16[i] = toASCIILower(c); | 1374 if (UNLIKELY(!matchLength)) |
| 598 } | 1375 return min(index, length()); |
| 599 return newImpl.release(); | 1376 |
| 600 } | 1377 // Check index & matchLength are in range. |
| 601 | 1378 if (index > length()) |
| 602 // Do a slower implementation for cases that include non-ASCII characters. | 1379 return kNotFound; |
| 603 UChar* data16; | 1380 unsigned searchLength = length() - index; |
| 604 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); | 1381 if (matchLength > searchLength) |
| 605 | 1382 return kNotFound; |
| 606 bool error; | 1383 |
| 607 int32_t realLength = Unicode::toLower(data16, length, characters16(), m_leng
th, &error); | 1384 if (is8Bit()) { |
| 608 if (!error && realLength == length) | 1385 if (matchString->is8Bit()) |
| 609 return newImpl.release(); | 1386 return findInternal(characters8() + index, matchString->characters8(), |
| 610 | 1387 index, searchLength, matchLength); |
| 611 newImpl = createUninitialized(realLength, data16); | 1388 return findInternal(characters8() + index, matchString->characters16(), |
| 612 Unicode::toLower(data16, realLength, characters16(), m_length, &error); | 1389 index, searchLength, matchLength); |
| 613 if (error) | 1390 } |
| 614 return this; | 1391 |
| 615 return newImpl.release(); | 1392 if (matchString->is8Bit()) |
| 616 } | 1393 return findInternal(characters16() + index, matchString->characters8(), |
| 617 | 1394 index, searchLength, matchLength); |
| 618 PassRefPtr<StringImpl> StringImpl::upper() | 1395 |
| 619 { | 1396 return findInternal(characters16() + index, matchString->characters16(), |
| 620 // This function could be optimized for no-op cases the way lower() is, | 1397 index, searchLength, matchLength); |
| 621 // but in empirical testing, few actual calls to upper() are no-ops, so | 1398 } |
| 622 // it wouldn't be worth the extra time for pre-scanning. | 1399 |
| 623 | 1400 template <typename SearchCharacterType, typename MatchCharacterType> |
| 624 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::ma
x())); | 1401 ALWAYS_INLINE static size_t findIgnoringCaseInner( |
| 625 int32_t length = m_length; | 1402 const SearchCharacterType* searchCharacters, |
| 626 | 1403 const MatchCharacterType* matchCharacters, |
| 627 if (is8Bit()) { | 1404 unsigned index, |
| 628 LChar* data8; | 1405 unsigned searchLength, |
| 629 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8); | 1406 unsigned matchLength) { |
| 630 | 1407 // delta is the number of additional times to test; delta == 0 means test only
once. |
| 631 // Do a faster loop for the case where all the characters are ASCII. | 1408 unsigned delta = searchLength - matchLength; |
| 632 LChar ored = 0; | 1409 |
| 633 for (int i = 0; i < length; ++i) { | 1410 unsigned i = 0; |
| 634 LChar c = characters8()[i]; | 1411 // keep looping until we match |
| 635 ored |= c; | 1412 while ( |
| 636 data8[i] = toASCIIUpper(c); | 1413 !equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) { |
| 637 } | 1414 if (i == delta) |
| 638 if (!(ored & ~0x7F)) | 1415 return kNotFound; |
| 639 return newImpl.release(); | 1416 ++i; |
| 640 | 1417 } |
| 641 // Do a slower implementation for cases that include non-ASCII Latin-1 c
haracters. | 1418 return index + i; |
| 642 int numberSharpSCharacters = 0; | 1419 } |
| 643 | 1420 |
| 644 // There are two special cases. | 1421 size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index) { |
| 645 // 1. latin-1 characters when converted to upper case are 16 bit charac
ters. | 1422 // Check for null or empty string to match against |
| 646 // 2. Lower case sharp-S converts to "SS" (two characters) | 1423 if (!matchString) |
| 647 for (int32_t i = 0; i < length; ++i) { | 1424 return kNotFound; |
| 648 LChar c = characters8()[i]; | 1425 unsigned matchLength = matchString->length(); |
| 649 if (UNLIKELY(c == smallLetterSharpSCharacter)) | 1426 if (!matchLength) |
| 650 ++numberSharpSCharacters; | 1427 return min(index, length()); |
| 651 UChar upper = static_cast<UChar>(Unicode::toUpper(c)); | 1428 |
| 652 if (UNLIKELY(upper > 0xff)) { | 1429 // Check index & matchLength are in range. |
| 653 // Since this upper-cased character does not fit in an 8-bit str
ing, we need to take the 16-bit path. | 1430 if (index > length()) |
| 654 goto upconvert; | 1431 return kNotFound; |
| 655 } | 1432 unsigned searchLength = length() - index; |
| 656 data8[i] = static_cast<LChar>(upper); | 1433 if (matchLength > searchLength) |
| 657 } | 1434 return kNotFound; |
| 658 | 1435 |
| 659 if (!numberSharpSCharacters) | 1436 if (is8Bit()) { |
| 660 return newImpl.release(); | 1437 if (matchString->is8Bit()) |
| 661 | 1438 return findIgnoringCaseInner(characters8() + index, |
| 662 // We have numberSSCharacters sharp-s characters, but none of the other
special characters. | 1439 matchString->characters8(), index, |
| 663 newImpl = createUninitialized(m_length + numberSharpSCharacters, data8); | 1440 searchLength, matchLength); |
| 664 | 1441 return findIgnoringCaseInner(characters8() + index, |
| 665 LChar* dest = data8; | 1442 matchString->characters16(), index, |
| 666 | 1443 searchLength, matchLength); |
| 667 for (int32_t i = 0; i < length; ++i) { | 1444 } |
| 668 LChar c = characters8()[i]; | 1445 |
| 669 if (c == smallLetterSharpSCharacter) { | 1446 if (matchString->is8Bit()) |
| 670 *dest++ = 'S'; | 1447 return findIgnoringCaseInner(characters16() + index, |
| 671 *dest++ = 'S'; | 1448 matchString->characters8(), index, |
| 672 } else { | 1449 searchLength, matchLength); |
| 673 *dest++ = static_cast<LChar>(Unicode::toUpper(c)); | 1450 |
| 674 } | 1451 return findIgnoringCaseInner(characters16() + index, |
| 675 } | 1452 matchString->characters16(), index, searchLength, |
| 676 | 1453 matchLength); |
| 677 return newImpl.release(); | 1454 } |
| 678 } | 1455 |
| 679 | 1456 template <typename SearchCharacterType, typename MatchCharacterType> |
| 680 upconvert: | 1457 ALWAYS_INLINE static size_t findIgnoringASCIICaseInner( |
| 681 RefPtr<StringImpl> upconverted = upconvertedString(); | 1458 const SearchCharacterType* searchCharacters, |
| 682 const UChar* source16 = upconverted->characters16(); | 1459 const MatchCharacterType* matchCharacters, |
| 683 | 1460 unsigned index, |
| 684 UChar* data16; | 1461 unsigned searchLength, |
| 685 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); | 1462 unsigned matchLength) { |
| 686 | 1463 // delta is the number of additional times to test; delta == 0 means test only
once. |
| 687 // Do a faster loop for the case where all the characters are ASCII. | 1464 unsigned delta = searchLength - matchLength; |
| 688 UChar ored = 0; | 1465 |
| 689 for (int i = 0; i < length; ++i) { | 1466 unsigned i = 0; |
| 690 UChar c = source16[i]; | 1467 // keep looping until we match |
| 691 ored |= c; | 1468 while (!equalIgnoringASCIICase(searchCharacters + i, matchCharacters, |
| 692 data16[i] = toASCIIUpper(c); | 1469 matchLength)) { |
| 693 } | 1470 if (i == delta) |
| 694 if (!(ored & ~0x7F)) | 1471 return kNotFound; |
| 695 return newImpl.release(); | 1472 ++i; |
| 696 | 1473 } |
| 697 // Do a slower implementation for cases that include non-ASCII characters. | 1474 return index + i; |
| 698 bool error; | 1475 } |
| 699 int32_t realLength = Unicode::toUpper(data16, length, source16, m_length, &e
rror); | 1476 |
| 700 if (!error && realLength == length) | 1477 size_t StringImpl::findIgnoringASCIICase(StringImpl* matchString, |
| 701 return newImpl; | 1478 unsigned index) { |
| 702 newImpl = createUninitialized(realLength, data16); | 1479 // Check for null or empty string to match against |
| 703 Unicode::toUpper(data16, realLength, source16, m_length, &error); | 1480 if (!matchString) |
| 704 if (error) | 1481 return kNotFound; |
| 705 return this; | 1482 unsigned matchLength = matchString->length(); |
| 706 return newImpl.release(); | 1483 if (!matchLength) |
| 707 } | 1484 return min(index, length()); |
| 708 | 1485 |
| 709 static inline bool localeIdMatchesLang(const AtomicString& localeId, const char*
lang) | 1486 // Check index & matchLength are in range. |
| 710 { | 1487 if (index > length()) |
| 711 size_t langLength = strlen(lang); | 1488 return kNotFound; |
| 712 RELEASE_ASSERT(langLength >= 2 && langLength <= 3); | 1489 unsigned searchLength = length() - index; |
| 713 if (!localeId.impl() || !localeId.impl()->startsWithIgnoringCase(lang, langL
ength)) | 1490 if (matchLength > searchLength) |
| 714 return false; | 1491 return kNotFound; |
| 715 if (localeId.impl()->length() == langLength) | 1492 |
| 716 return true; | 1493 if (is8Bit()) { |
| 717 const UChar maybeDelimiter = (*localeId.impl())[langLength]; | 1494 const LChar* searchStart = characters8() + index; |
| 718 return maybeDelimiter == '-' || maybeDelimiter == '_' || maybeDelimiter == '
@'; | 1495 if (matchString->is8Bit()) |
| 719 } | 1496 return findIgnoringASCIICaseInner(searchStart, matchString->characters8(), |
| 720 | 1497 index, searchLength, matchLength); |
| 721 typedef int32_t (*icuCaseConverter)(UChar*, int32_t, const UChar*, int32_t, cons
t char*, UErrorCode*); | 1498 return findIgnoringASCIICaseInner(searchStart, matchString->characters16(), |
| 722 | 1499 index, searchLength, matchLength); |
| 723 static PassRefPtr<StringImpl> caseConvert(const UChar* source16, size_t length,
icuCaseConverter converter, const char* locale, StringImpl* originalString) | 1500 } |
| 724 { | 1501 |
| 725 UChar* data16; | 1502 const UChar* searchStart = characters16() + index; |
| 726 size_t targetLength = length; | 1503 if (matchString->is8Bit()) |
| 727 RefPtr<StringImpl> output = StringImpl::createUninitialized(length, data16); | 1504 return findIgnoringASCIICaseInner(searchStart, matchString->characters8(), |
| 728 do { | 1505 index, searchLength, matchLength); |
| 729 UErrorCode status = U_ZERO_ERROR; | 1506 return findIgnoringASCIICaseInner(searchStart, matchString->characters16(), |
| 730 targetLength = converter(data16, targetLength, source16, length, locale,
&status); | 1507 index, searchLength, matchLength); |
| 731 if (U_SUCCESS(status)) { | 1508 } |
| 732 if (length > 0) | 1509 |
| 733 return output->substring(0, targetLength); | 1510 size_t StringImpl::findNextLineStart(unsigned index) { |
| 734 return output.release(); | 1511 if (is8Bit()) |
| 735 } | 1512 return WTF::findNextLineStart(characters8(), m_length, index); |
| 736 if (status != U_BUFFER_OVERFLOW_ERROR) | 1513 return WTF::findNextLineStart(characters16(), m_length, index); |
| 737 return originalString; | 1514 } |
| 738 // Expand the buffer. | 1515 |
| 739 output = StringImpl::createUninitialized(targetLength, data16); | 1516 size_t StringImpl::count(LChar c) const { |
| 740 } while (true); | 1517 int count = 0; |
| 741 } | 1518 if (is8Bit()) { |
| 742 | 1519 for (size_t i = 0; i < m_length; ++i) |
| 743 PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier) | 1520 count += characters8()[i] == c; |
| 744 { | 1521 } else { |
| 745 // Use the more-optimized code path most of the time. | 1522 for (size_t i = 0; i < m_length; ++i) |
| 746 // Only Turkic (tr and az) languages and Lithuanian requires | 1523 count += characters16()[i] == c; |
| 747 // locale-specific lowercasing rules. Even though CLDR has el-Lower, | 1524 } |
| 748 // it's identical to the locale-agnostic lowercasing. Context-dependent | 1525 return count; |
| 749 // handling of Greek capital sigma is built into the common lowercasing | 1526 } |
| 750 // function in ICU. | 1527 |
| 751 const char* localeForConversion = 0; | 1528 size_t StringImpl::reverseFind(UChar c, unsigned index) { |
| 752 if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(local
eIdentifier, "az")) | 1529 if (is8Bit()) |
| 753 localeForConversion = "tr"; | 1530 return WTF::reverseFind(characters8(), m_length, c, index); |
| 754 else if (localeIdMatchesLang(localeIdentifier, "lt")) | 1531 return WTF::reverseFind(characters16(), m_length, c, index); |
| 755 localeForConversion = "lt"; | 1532 } |
| 756 else | 1533 |
| 757 return lower(); | 1534 template <typename SearchCharacterType, typename MatchCharacterType> |
| 758 | 1535 ALWAYS_INLINE static size_t reverseFindInner( |
| 759 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max())) | 1536 const SearchCharacterType* searchCharacters, |
| 760 CRASH(); | 1537 const MatchCharacterType* matchCharacters, |
| 761 int length = m_length; | 1538 unsigned index, |
| 762 | 1539 unsigned length, |
| 763 RefPtr<StringImpl> upconverted = upconvertedString(); | 1540 unsigned matchLength) { |
| 764 const UChar* source16 = upconverted->characters16(); | 1541 // Optimization: keep a running hash of the strings, |
| 765 return caseConvert(source16, length, u_strToLower, localeForConversion, this
); | 1542 // only call equal if the hashes match. |
| 766 } | 1543 |
| 767 | 1544 // delta is the number of additional times to test; delta == 0 means test only
once. |
| 768 PassRefPtr<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier) | 1545 unsigned delta = min(index, length - matchLength); |
| 769 { | 1546 |
| 770 // Use the more-optimized code path most of the time. | 1547 unsigned searchHash = 0; |
| 771 // Only Turkic (tr and az) languages and Greek require locale-specific | 1548 unsigned matchHash = 0; |
| 772 // lowercasing rules. | 1549 for (unsigned i = 0; i < matchLength; ++i) { |
| 773 icu::UnicodeString transliteratorId; | 1550 searchHash += searchCharacters[delta + i]; |
| 774 const char* localeForConversion = 0; | 1551 matchHash += matchCharacters[i]; |
| 775 if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(local
eIdentifier, "az")) | 1552 } |
| 776 localeForConversion = "tr"; | 1553 |
| 777 else if (localeIdMatchesLang(localeIdentifier, "el")) | 1554 // keep looping until we match |
| 778 transliteratorId = UNICODE_STRING_SIMPLE("el-Upper"); | 1555 while (searchHash != matchHash || |
| 779 else if (localeIdMatchesLang(localeIdentifier, "lt")) | 1556 !equal(searchCharacters + delta, matchCharacters, matchLength)) { |
| 780 localeForConversion = "lt"; | 1557 if (!delta) |
| 781 else | 1558 return kNotFound; |
| 782 return upper(); | 1559 --delta; |
| 783 | 1560 searchHash -= searchCharacters[delta + matchLength]; |
| 784 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max())) | 1561 searchHash += searchCharacters[delta]; |
| 785 CRASH(); | 1562 } |
| 786 int length = m_length; | 1563 return delta; |
| 787 | 1564 } |
| 788 RefPtr<StringImpl> upconverted = upconvertedString(); | 1565 |
| 789 const UChar* source16 = upconverted->characters16(); | 1566 size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index) { |
| 790 | 1567 // Check for null or empty string to match against |
| 791 if (localeForConversion) | 1568 if (!matchString) |
| 792 return caseConvert(source16, length, u_strToUpper, localeForConversion,
this); | 1569 return kNotFound; |
| 793 | 1570 unsigned matchLength = matchString->length(); |
| 794 // TODO(jungshik): Cache transliterator if perf penaly warrants it for Greek
. | 1571 unsigned ourLength = length(); |
| 795 UErrorCode status = U_ZERO_ERROR; | 1572 if (!matchLength) |
| 796 OwnPtr<icu::Transliterator> translit = | 1573 return min(index, ourLength); |
| 797 adoptPtr(icu::Transliterator::createInstance(transliteratorId, UTRANS_FO
RWARD, status)); | 1574 |
| 798 if (U_FAILURE(status)) | 1575 // Optimization 1: fast case for strings of length 1. |
| 799 return upper(); | 1576 if (matchLength == 1) { |
| 800 | 1577 if (is8Bit()) |
| 801 // target will be copy-on-write. | 1578 return WTF::reverseFind(characters8(), ourLength, (*matchString)[0], |
| 802 icu::UnicodeString target(false, source16, length); | 1579 index); |
| 803 translit->transliterate(target); | 1580 return WTF::reverseFind(characters16(), ourLength, (*matchString)[0], |
| 804 | 1581 index); |
| 805 return create(target.getBuffer(), target.length()); | 1582 } |
| 806 } | 1583 |
| 807 | 1584 // Check index & matchLength are in range. |
| 808 PassRefPtr<StringImpl> StringImpl::fill(UChar character) | 1585 if (matchLength > ourLength) |
| 809 { | 1586 return kNotFound; |
| 810 if (!(character & ~0x7F)) { | 1587 |
| 811 LChar* data; | 1588 if (is8Bit()) { |
| 812 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | 1589 if (matchString->is8Bit()) |
| 813 for (unsigned i = 0; i < m_length; ++i) | 1590 return reverseFindInner(characters8(), matchString->characters8(), index, |
| 814 data[i] = static_cast<LChar>(character); | 1591 ourLength, matchLength); |
| 815 return newImpl.release(); | 1592 return reverseFindInner(characters8(), matchString->characters16(), index, |
| 816 } | 1593 ourLength, matchLength); |
| 1594 } |
| 1595 |
| 1596 if (matchString->is8Bit()) |
| 1597 return reverseFindInner(characters16(), matchString->characters8(), index, |
| 1598 ourLength, matchLength); |
| 1599 |
| 1600 return reverseFindInner(characters16(), matchString->characters16(), index, |
| 1601 ourLength, matchLength); |
| 1602 } |
| 1603 |
| 1604 ALWAYS_INLINE static bool equalSubstring(const StringImpl* stringImpl, |
| 1605 unsigned startOffset, |
| 1606 const LChar* matchString, |
| 1607 unsigned matchLength) { |
| 1608 ASSERT(stringImpl); |
| 1609 ASSERT(matchLength <= stringImpl->length()); |
| 1610 ASSERT(startOffset + matchLength <= stringImpl->length()); |
| 1611 |
| 1612 if (stringImpl->is8Bit()) |
| 1613 return equal(stringImpl->characters8() + startOffset, matchString, |
| 1614 matchLength); |
| 1615 return equal(stringImpl->characters16() + startOffset, matchString, |
| 1616 matchLength); |
| 1617 } |
| 1618 |
| 1619 bool StringImpl::startsWith(UChar character) const { |
| 1620 return m_length && (*this)[0] == character; |
| 1621 } |
| 1622 |
| 1623 bool StringImpl::startsWith(const char* prefixString, |
| 1624 unsigned prefixLength) const { |
| 1625 ASSERT(prefixLength); |
| 1626 if (prefixLength > length()) |
| 1627 return false; |
| 1628 return equalSubstring(this, 0, reinterpret_cast<const LChar*>(prefixString), |
| 1629 prefixLength); |
| 1630 } |
| 1631 |
| 1632 ALWAYS_INLINE static bool equalSubstring(const StringImpl* stringImpl, |
| 1633 unsigned startOffset, |
| 1634 const StringImpl* matchString) { |
| 1635 ASSERT(stringImpl); |
| 1636 ASSERT(matchString); |
| 1637 ASSERT(matchString->length() <= stringImpl->length()); |
| 1638 ASSERT(startOffset + matchString->length() <= stringImpl->length()); |
| 1639 |
| 1640 unsigned matchLength = matchString->length(); |
| 1641 if (matchString->is8Bit()) |
| 1642 return equalSubstring(stringImpl, startOffset, matchString->characters8(), |
| 1643 matchLength); |
| 1644 if (stringImpl->is8Bit()) |
| 1645 return equal(stringImpl->characters8() + startOffset, |
| 1646 matchString->characters16(), matchLength); |
| 1647 return equal(stringImpl->characters16() + startOffset, |
| 1648 matchString->characters16(), matchLength); |
| 1649 } |
| 1650 |
| 1651 bool StringImpl::startsWith(const StringImpl* prefix) const { |
| 1652 ASSERT(prefix); |
| 1653 if (prefix->length() > length()) |
| 1654 return false; |
| 1655 return equalSubstring(this, 0, prefix); |
| 1656 } |
| 1657 |
| 1658 ALWAYS_INLINE static bool equalSubstringIgnoringCase( |
| 1659 const StringImpl* stringImpl, |
| 1660 unsigned startOffset, |
| 1661 const LChar* matchString, |
| 1662 unsigned matchLength) { |
| 1663 ASSERT(stringImpl); |
| 1664 ASSERT(matchLength <= stringImpl->length()); |
| 1665 ASSERT(startOffset + matchLength <= stringImpl->length()); |
| 1666 |
| 1667 if (stringImpl->is8Bit()) |
| 1668 return equalIgnoringCase(stringImpl->characters8() + startOffset, |
| 1669 matchString, matchLength); |
| 1670 return equalIgnoringCase(stringImpl->characters16() + startOffset, |
| 1671 matchString, matchLength); |
| 1672 } |
| 1673 |
| 1674 bool StringImpl::startsWithIgnoringCase(const char* prefixString, |
| 1675 unsigned prefixLength) const { |
| 1676 ASSERT(prefixLength); |
| 1677 if (prefixLength > length()) |
| 1678 return false; |
| 1679 return equalSubstringIgnoringCase( |
| 1680 this, 0, reinterpret_cast<const LChar*>(prefixString), prefixLength); |
| 1681 } |
| 1682 |
| 1683 ALWAYS_INLINE static bool equalSubstringIgnoringCase( |
| 1684 const StringImpl* stringImpl, |
| 1685 unsigned startOffset, |
| 1686 const StringImpl* matchString) { |
| 1687 ASSERT(stringImpl); |
| 1688 ASSERT(matchString); |
| 1689 ASSERT(matchString->length() <= stringImpl->length()); |
| 1690 ASSERT(startOffset + matchString->length() <= stringImpl->length()); |
| 1691 |
| 1692 unsigned matchLength = matchString->length(); |
| 1693 if (matchString->is8Bit()) |
| 1694 return equalSubstringIgnoringCase(stringImpl, startOffset, |
| 1695 matchString->characters8(), matchLength); |
| 1696 if (stringImpl->is8Bit()) |
| 1697 return equalIgnoringCase(stringImpl->characters8() + startOffset, |
| 1698 matchString->characters16(), matchLength); |
| 1699 return equalIgnoringCase(stringImpl->characters16() + startOffset, |
| 1700 matchString->characters16(), matchLength); |
| 1701 } |
| 1702 |
| 1703 bool StringImpl::startsWithIgnoringCase(const StringImpl* prefix) const { |
| 1704 ASSERT(prefix); |
| 1705 if (prefix->length() > length()) |
| 1706 return false; |
| 1707 return equalSubstringIgnoringCase(this, 0, prefix); |
| 1708 } |
| 1709 |
| 1710 ALWAYS_INLINE static bool equalSubstringIgnoringASCIICase( |
| 1711 const StringImpl* stringImpl, |
| 1712 unsigned startOffset, |
| 1713 const LChar* matchString, |
| 1714 unsigned matchLength) { |
| 1715 ASSERT(stringImpl); |
| 1716 ASSERT(matchLength <= stringImpl->length()); |
| 1717 ASSERT(startOffset + matchLength <= stringImpl->length()); |
| 1718 |
| 1719 if (stringImpl->is8Bit()) |
| 1720 return equalIgnoringASCIICase(stringImpl->characters8() + startOffset, |
| 1721 matchString, matchLength); |
| 1722 return equalIgnoringASCIICase(stringImpl->characters16() + startOffset, |
| 1723 matchString, matchLength); |
| 1724 } |
| 1725 |
| 1726 bool StringImpl::startsWithIgnoringASCIICase(const char* prefixString, |
| 1727 unsigned prefixLength) const { |
| 1728 ASSERT(prefixLength); |
| 1729 if (prefixLength > length()) |
| 1730 return false; |
| 1731 return equalSubstringIgnoringASCIICase( |
| 1732 this, 0, reinterpret_cast<const LChar*>(prefixString), prefixLength); |
| 1733 } |
| 1734 |
| 1735 ALWAYS_INLINE static bool equalSubstringIgnoringASCIICase( |
| 1736 const StringImpl* stringImpl, |
| 1737 unsigned startOffset, |
| 1738 const StringImpl* matchString) { |
| 1739 ASSERT(stringImpl); |
| 1740 ASSERT(matchString); |
| 1741 ASSERT(matchString->length() <= stringImpl->length()); |
| 1742 ASSERT(startOffset + matchString->length() <= stringImpl->length()); |
| 1743 |
| 1744 unsigned matchLength = matchString->length(); |
| 1745 if (matchString->is8Bit()) |
| 1746 return equalSubstringIgnoringASCIICase( |
| 1747 stringImpl, startOffset, matchString->characters8(), matchLength); |
| 1748 if (stringImpl->is8Bit()) |
| 1749 return equalIgnoringASCIICase(stringImpl->characters8() + startOffset, |
| 1750 matchString->characters16(), matchLength); |
| 1751 return equalIgnoringASCIICase(stringImpl->characters16() + startOffset, |
| 1752 matchString->characters16(), matchLength); |
| 1753 } |
| 1754 |
| 1755 bool StringImpl::startsWithIgnoringASCIICase(const StringImpl* prefix) const { |
| 1756 ASSERT(prefix); |
| 1757 if (prefix->length() > length()) |
| 1758 return false; |
| 1759 return equalSubstringIgnoringASCIICase(this, 0, prefix); |
| 1760 } |
| 1761 |
| 1762 bool StringImpl::endsWith(UChar character) const { |
| 1763 return m_length && (*this)[m_length - 1] == character; |
| 1764 } |
| 1765 |
| 1766 bool StringImpl::endsWith(const char* suffixString, |
| 1767 unsigned suffixLength) const { |
| 1768 ASSERT(suffixLength); |
| 1769 if (suffixLength > length()) |
| 1770 return false; |
| 1771 return equalSubstring(this, length() - suffixLength, |
| 1772 reinterpret_cast<const LChar*>(suffixString), |
| 1773 suffixLength); |
| 1774 } |
| 1775 |
| 1776 bool StringImpl::endsWith(const StringImpl* suffix) const { |
| 1777 ASSERT(suffix); |
| 1778 unsigned suffixLength = suffix->length(); |
| 1779 if (suffixLength > length()) |
| 1780 return false; |
| 1781 return equalSubstring(this, length() - suffixLength, suffix); |
| 1782 } |
| 1783 |
| 1784 bool StringImpl::endsWithIgnoringCase(const char* suffixString, |
| 1785 unsigned suffixLength) const { |
| 1786 ASSERT(suffixLength); |
| 1787 if (suffixLength > length()) |
| 1788 return false; |
| 1789 return equalSubstringIgnoringCase( |
| 1790 this, length() - suffixLength, |
| 1791 reinterpret_cast<const LChar*>(suffixString), suffixLength); |
| 1792 } |
| 1793 |
| 1794 bool StringImpl::endsWithIgnoringCase(const StringImpl* suffix) const { |
| 1795 ASSERT(suffix); |
| 1796 unsigned suffixLength = suffix->length(); |
| 1797 if (suffixLength > length()) |
| 1798 return false; |
| 1799 return equalSubstringIgnoringCase(this, length() - suffixLength, suffix); |
| 1800 } |
| 1801 |
| 1802 bool StringImpl::endsWithIgnoringASCIICase(const char* suffixString, |
| 1803 unsigned suffixLength) const { |
| 1804 ASSERT(suffixLength); |
| 1805 if (suffixLength > length()) |
| 1806 return false; |
| 1807 return equalSubstringIgnoringASCIICase( |
| 1808 this, length() - suffixLength, |
| 1809 reinterpret_cast<const LChar*>(suffixString), suffixLength); |
| 1810 } |
| 1811 |
| 1812 bool StringImpl::endsWithIgnoringASCIICase(const StringImpl* suffix) const { |
| 1813 ASSERT(suffix); |
| 1814 unsigned suffixLength = suffix->length(); |
| 1815 if (suffixLength > length()) |
| 1816 return false; |
| 1817 return equalSubstringIgnoringASCIICase(this, length() - suffixLength, suffix); |
| 1818 } |
| 1819 |
| 1820 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC) { |
| 1821 if (oldC == newC) |
| 1822 return this; |
| 1823 |
| 1824 if (find(oldC) == kNotFound) |
| 1825 return this; |
| 1826 |
| 1827 unsigned i; |
| 1828 if (is8Bit()) { |
| 1829 if (newC <= 0xff) { |
| 1830 LChar* data; |
| 1831 LChar oldChar = static_cast<LChar>(oldC); |
| 1832 LChar newChar = static_cast<LChar>(newC); |
| 1833 |
| 1834 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); |
| 1835 |
| 1836 for (i = 0; i != m_length; ++i) { |
| 1837 LChar ch = characters8()[i]; |
| 1838 if (ch == oldChar) |
| 1839 ch = newChar; |
| 1840 data[i] = ch; |
| 1841 } |
| 1842 return newImpl.release(); |
| 1843 } |
| 1844 |
| 1845 // There is the possibility we need to up convert from 8 to 16 bit, |
| 1846 // create a 16 bit string for the result. |
| 817 UChar* data; | 1847 UChar* data; |
| 818 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | 1848 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); |
| 819 for (unsigned i = 0; i < m_length; ++i) | 1849 |
| 820 data[i] = character; | 1850 for (i = 0; i != m_length; ++i) { |
| 1851 UChar ch = characters8()[i]; |
| 1852 if (ch == oldC) |
| 1853 ch = newC; |
| 1854 data[i] = ch; |
| 1855 } |
| 1856 |
| 821 return newImpl.release(); | 1857 return newImpl.release(); |
| 822 } | 1858 } |
| 823 | 1859 |
| 824 PassRefPtr<StringImpl> StringImpl::foldCase() | 1860 UChar* data; |
| 825 { | 1861 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); |
| 826 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::ma
x())); | 1862 |
| 827 int32_t length = m_length; | 1863 for (i = 0; i != m_length; ++i) { |
| 828 | 1864 UChar ch = characters16()[i]; |
| 829 if (is8Bit()) { | 1865 if (ch == oldC) |
| 830 // Do a faster loop for the case where all the characters are ASCII. | 1866 ch = newC; |
| 831 LChar* data; | 1867 data[i] = ch; |
| 832 RefPtr <StringImpl>newImpl = createUninitialized(m_length, data); | 1868 } |
| 833 LChar ored = 0; | 1869 return newImpl.release(); |
| 834 | 1870 } |
| 835 for (int32_t i = 0; i < length; ++i) { | 1871 |
| 836 LChar c = characters8()[i]; | 1872 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, |
| 837 data[i] = toASCIILower(c); | 1873 unsigned lengthToReplace, |
| 838 ored |= c; | 1874 StringImpl* str) { |
| 839 } | 1875 position = min(position, length()); |
| 840 | 1876 lengthToReplace = min(lengthToReplace, length() - position); |
| 841 if (!(ored & ~0x7F)) | 1877 unsigned lengthToInsert = str ? str->length() : 0; |
| 842 return newImpl.release(); | 1878 if (!lengthToReplace && !lengthToInsert) |
| 843 | 1879 return this; |
| 844 // Do a slower implementation for cases that include non-ASCII Latin-1 c
haracters. | 1880 |
| 845 for (int32_t i = 0; i < length; ++i) | 1881 RELEASE_ASSERT((length() - lengthToReplace) < |
| 846 data[i] = static_cast<LChar>(Unicode::toLower(characters8()[i])); | 1882 (numeric_limits<unsigned>::max() - lengthToInsert)); |
| 847 | 1883 |
| 848 return newImpl.release(); | 1884 if (is8Bit() && (!str || str->is8Bit())) { |
| 849 } | 1885 LChar* data; |
| 850 | |
| 851 // Do a faster loop for the case where all the characters are ASCII. | |
| 852 UChar* data; | |
| 853 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
| 854 UChar ored = 0; | |
| 855 for (int32_t i = 0; i < length; ++i) { | |
| 856 UChar c = characters16()[i]; | |
| 857 ored |= c; | |
| 858 data[i] = toASCIILower(c); | |
| 859 } | |
| 860 if (!(ored & ~0x7F)) | |
| 861 return newImpl.release(); | |
| 862 | |
| 863 // Do a slower implementation for cases that include non-ASCII characters. | |
| 864 bool error; | |
| 865 int32_t realLength = Unicode::foldCase(data, length, characters16(), m_lengt
h, &error); | |
| 866 if (!error && realLength == length) | |
| 867 return newImpl.release(); | |
| 868 newImpl = createUninitialized(realLength, data); | |
| 869 Unicode::foldCase(data, realLength, characters16(), m_length, &error); | |
| 870 if (error) | |
| 871 return this; | |
| 872 return newImpl.release(); | |
| 873 } | |
| 874 | |
| 875 template <class UCharPredicate> | |
| 876 inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters(UCharPredicate
predicate) | |
| 877 { | |
| 878 if (!m_length) | |
| 879 return empty(); | |
| 880 | |
| 881 unsigned start = 0; | |
| 882 unsigned end = m_length - 1; | |
| 883 | |
| 884 // skip white space from start | |
| 885 while (start <= end && predicate(is8Bit() ? characters8()[start] : character
s16()[start])) | |
| 886 ++start; | |
| 887 | |
| 888 // only white space | |
| 889 if (start > end) | |
| 890 return empty(); | |
| 891 | |
| 892 // skip white space from end | |
| 893 while (end && predicate(is8Bit() ? characters8()[end] : characters16()[end])
) | |
| 894 --end; | |
| 895 | |
| 896 if (!start && end == m_length - 1) | |
| 897 return this; | |
| 898 if (is8Bit()) | |
| 899 return create(characters8() + start, end + 1 - start); | |
| 900 return create(characters16() + start, end + 1 - start); | |
| 901 } | |
| 902 | |
| 903 class UCharPredicate final { | |
| 904 STACK_ALLOCATED(); | |
| 905 public: | |
| 906 inline UCharPredicate(CharacterMatchFunctionPtr function): m_function(functi
on) { } | |
| 907 | |
| 908 inline bool operator()(UChar ch) const | |
| 909 { | |
| 910 return m_function(ch); | |
| 911 } | |
| 912 | |
| 913 private: | |
| 914 const CharacterMatchFunctionPtr m_function; | |
| 915 }; | |
| 916 | |
| 917 class SpaceOrNewlinePredicate final { | |
| 918 STACK_ALLOCATED(); | |
| 919 public: | |
| 920 inline bool operator()(UChar ch) const | |
| 921 { | |
| 922 return isSpaceOrNewline(ch); | |
| 923 } | |
| 924 }; | |
| 925 | |
| 926 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace() | |
| 927 { | |
| 928 return stripMatchedCharacters(SpaceOrNewlinePredicate()); | |
| 929 } | |
| 930 | |
| 931 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhi
teSpace) | |
| 932 { | |
| 933 return stripMatchedCharacters(UCharPredicate(isWhiteSpace)); | |
| 934 } | |
| 935 | |
| 936 template <typename CharType> | |
| 937 ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters(const CharType
* characters, CharacterMatchFunctionPtr findMatch) | |
| 938 { | |
| 939 const CharType* from = characters; | |
| 940 const CharType* fromend = from + m_length; | |
| 941 | |
| 942 // Assume the common case will not remove any characters | |
| 943 while (from != fromend && !findMatch(*from)) | |
| 944 ++from; | |
| 945 if (from == fromend) | |
| 946 return this; | |
| 947 | |
| 948 StringBuffer<CharType> data(m_length); | |
| 949 CharType* to = data.characters(); | |
| 950 unsigned outc = from - characters; | |
| 951 | |
| 952 if (outc) | |
| 953 memcpy(to, characters, outc * sizeof(CharType)); | |
| 954 | |
| 955 while (true) { | |
| 956 while (from != fromend && findMatch(*from)) | |
| 957 ++from; | |
| 958 while (from != fromend && !findMatch(*from)) | |
| 959 to[outc++] = *from++; | |
| 960 if (from == fromend) | |
| 961 break; | |
| 962 } | |
| 963 | |
| 964 data.shrink(outc); | |
| 965 | |
| 966 return data.release(); | |
| 967 } | |
| 968 | |
| 969 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr fi
ndMatch) | |
| 970 { | |
| 971 if (is8Bit()) | |
| 972 return removeCharacters(characters8(), findMatch); | |
| 973 return removeCharacters(characters16(), findMatch); | |
| 974 } | |
| 975 | |
| 976 template <typename CharType, class UCharPredicate> | |
| 977 inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UChar
Predicate predicate, StripBehavior stripBehavior) | |
| 978 { | |
| 979 StringBuffer<CharType> data(m_length); | |
| 980 | |
| 981 const CharType* from = getCharacters<CharType>(); | |
| 982 const CharType* fromend = from + m_length; | |
| 983 int outc = 0; | |
| 984 bool changedToSpace = false; | |
| 985 | |
| 986 CharType* to = data.characters(); | |
| 987 | |
| 988 if (stripBehavior == StripExtraWhiteSpace) { | |
| 989 while (true) { | |
| 990 while (from != fromend && predicate(*from)) { | |
| 991 if (*from != ' ') | |
| 992 changedToSpace = true; | |
| 993 ++from; | |
| 994 } | |
| 995 while (from != fromend && !predicate(*from)) | |
| 996 to[outc++] = *from++; | |
| 997 if (from != fromend) | |
| 998 to[outc++] = ' '; | |
| 999 else | |
| 1000 break; | |
| 1001 } | |
| 1002 | |
| 1003 if (outc > 0 && to[outc - 1] == ' ') | |
| 1004 --outc; | |
| 1005 } else { | |
| 1006 for (; from != fromend; ++from) { | |
| 1007 if (predicate(*from)) { | |
| 1008 if (*from != ' ') | |
| 1009 changedToSpace = true; | |
| 1010 to[outc++] = ' '; | |
| 1011 } else { | |
| 1012 to[outc++] = *from; | |
| 1013 } | |
| 1014 } | |
| 1015 } | |
| 1016 | |
| 1017 if (static_cast<unsigned>(outc) == m_length && !changedToSpace) | |
| 1018 return this; | |
| 1019 | |
| 1020 data.shrink(outc); | |
| 1021 | |
| 1022 return data.release(); | |
| 1023 } | |
| 1024 | |
| 1025 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(StripBehavior stripBehavio
r) | |
| 1026 { | |
| 1027 if (is8Bit()) | |
| 1028 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(SpaceOrNewlin
ePredicate(), stripBehavior); | |
| 1029 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(SpaceOrNewlinePre
dicate(), stripBehavior); | |
| 1030 } | |
| 1031 | |
| 1032 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr is
WhiteSpace, StripBehavior stripBehavior) | |
| 1033 { | |
| 1034 if (is8Bit()) | |
| 1035 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(UCharPredicat
e(isWhiteSpace), stripBehavior); | |
| 1036 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(UCharPredicate(is
WhiteSpace), stripBehavior); | |
| 1037 } | |
| 1038 | |
| 1039 int StringImpl::toIntStrict(bool* ok, int base) | |
| 1040 { | |
| 1041 if (is8Bit()) | |
| 1042 return charactersToIntStrict(characters8(), m_length, ok, base); | |
| 1043 return charactersToIntStrict(characters16(), m_length, ok, base); | |
| 1044 } | |
| 1045 | |
| 1046 unsigned StringImpl::toUIntStrict(bool* ok, int base) | |
| 1047 { | |
| 1048 if (is8Bit()) | |
| 1049 return charactersToUIntStrict(characters8(), m_length, ok, base); | |
| 1050 return charactersToUIntStrict(characters16(), m_length, ok, base); | |
| 1051 } | |
| 1052 | |
| 1053 int64_t StringImpl::toInt64Strict(bool* ok, int base) | |
| 1054 { | |
| 1055 if (is8Bit()) | |
| 1056 return charactersToInt64Strict(characters8(), m_length, ok, base); | |
| 1057 return charactersToInt64Strict(characters16(), m_length, ok, base); | |
| 1058 } | |
| 1059 | |
| 1060 uint64_t StringImpl::toUInt64Strict(bool* ok, int base) | |
| 1061 { | |
| 1062 if (is8Bit()) | |
| 1063 return charactersToUInt64Strict(characters8(), m_length, ok, base); | |
| 1064 return charactersToUInt64Strict(characters16(), m_length, ok, base); | |
| 1065 } | |
| 1066 | |
| 1067 int StringImpl::toInt(bool* ok) | |
| 1068 { | |
| 1069 if (is8Bit()) | |
| 1070 return charactersToInt(characters8(), m_length, ok); | |
| 1071 return charactersToInt(characters16(), m_length, ok); | |
| 1072 } | |
| 1073 | |
| 1074 unsigned StringImpl::toUInt(bool* ok) | |
| 1075 { | |
| 1076 if (is8Bit()) | |
| 1077 return charactersToUInt(characters8(), m_length, ok); | |
| 1078 return charactersToUInt(characters16(), m_length, ok); | |
| 1079 } | |
| 1080 | |
| 1081 int64_t StringImpl::toInt64(bool* ok) | |
| 1082 { | |
| 1083 if (is8Bit()) | |
| 1084 return charactersToInt64(characters8(), m_length, ok); | |
| 1085 return charactersToInt64(characters16(), m_length, ok); | |
| 1086 } | |
| 1087 | |
| 1088 uint64_t StringImpl::toUInt64(bool* ok) | |
| 1089 { | |
| 1090 if (is8Bit()) | |
| 1091 return charactersToUInt64(characters8(), m_length, ok); | |
| 1092 return charactersToUInt64(characters16(), m_length, ok); | |
| 1093 } | |
| 1094 | |
| 1095 double StringImpl::toDouble(bool* ok) | |
| 1096 { | |
| 1097 if (is8Bit()) | |
| 1098 return charactersToDouble(characters8(), m_length, ok); | |
| 1099 return charactersToDouble(characters16(), m_length, ok); | |
| 1100 } | |
| 1101 | |
| 1102 float StringImpl::toFloat(bool* ok) | |
| 1103 { | |
| 1104 if (is8Bit()) | |
| 1105 return charactersToFloat(characters8(), m_length, ok); | |
| 1106 return charactersToFloat(characters16(), m_length, ok); | |
| 1107 } | |
| 1108 | |
| 1109 // Table is based on ftp://ftp.unicode.org/Public/UNIDATA/CaseFolding.txt | |
| 1110 const UChar StringImpl::latin1CaseFoldTable[256] = { | |
| 1111 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x00
09, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, | |
| 1112 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x00
19, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, | |
| 1113 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x00
29, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, | |
| 1114 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x00
39, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, | |
| 1115 0x0040, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x00
69, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, | |
| 1116 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x00
79, 0x007a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, | |
| 1117 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x00
69, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, | |
| 1118 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x00
79, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, | |
| 1119 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x00
89, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, | |
| 1120 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x00
99, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, | |
| 1121 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00
a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, | |
| 1122 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x03bc, 0x00b6, 0x00b7, 0x00b8, 0x00
b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, | |
| 1123 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00
e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, | |
| 1124 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00d7, 0x00f8, 0x00
f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00df, | |
| 1125 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00
e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, | |
| 1126 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00
f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, | |
| 1127 }; | |
| 1128 | |
| 1129 bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length) | |
| 1130 { | |
| 1131 while (length--) { | |
| 1132 if (StringImpl::latin1CaseFoldTable[*a++] != StringImpl::latin1CaseFoldT
able[*b++]) | |
| 1133 return false; | |
| 1134 } | |
| 1135 return true; | |
| 1136 } | |
| 1137 | |
| 1138 bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length) | |
| 1139 { | |
| 1140 while (length--) { | |
| 1141 if (foldCase(*a++) != StringImpl::latin1CaseFoldTable[*b++]) | |
| 1142 return false; | |
| 1143 } | |
| 1144 return true; | |
| 1145 } | |
| 1146 | |
| 1147 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start) | |
| 1148 { | |
| 1149 if (is8Bit()) | |
| 1150 return WTF::find(characters8(), m_length, matchFunction, start); | |
| 1151 return WTF::find(characters16(), m_length, matchFunction, start); | |
| 1152 } | |
| 1153 | |
| 1154 size_t StringImpl::find(const LChar* matchString, unsigned index) | |
| 1155 { | |
| 1156 // Check for null or empty string to match against | |
| 1157 if (!matchString) | |
| 1158 return kNotFound; | |
| 1159 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString)
); | |
| 1160 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max()); | |
| 1161 unsigned matchLength = matchStringLength; | |
| 1162 if (!matchLength) | |
| 1163 return min(index, length()); | |
| 1164 | |
| 1165 // Optimization 1: fast case for strings of length 1. | |
| 1166 if (matchLength == 1) | |
| 1167 return WTF::find(characters16(), length(), *matchString, index); | |
| 1168 | |
| 1169 // Check index & matchLength are in range. | |
| 1170 if (index > length()) | |
| 1171 return kNotFound; | |
| 1172 unsigned searchLength = length() - index; | |
| 1173 if (matchLength > searchLength) | |
| 1174 return kNotFound; | |
| 1175 // delta is the number of additional times to test; delta == 0 means test on
ly once. | |
| 1176 unsigned delta = searchLength - matchLength; | |
| 1177 | |
| 1178 const UChar* searchCharacters = characters16() + index; | |
| 1179 | |
| 1180 // Optimization 2: keep a running hash of the strings, | |
| 1181 // only call equal if the hashes match. | |
| 1182 unsigned searchHash = 0; | |
| 1183 unsigned matchHash = 0; | |
| 1184 for (unsigned i = 0; i < matchLength; ++i) { | |
| 1185 searchHash += searchCharacters[i]; | |
| 1186 matchHash += matchString[i]; | |
| 1187 } | |
| 1188 | |
| 1189 unsigned i = 0; | |
| 1190 // keep looping until we match | |
| 1191 while (searchHash != matchHash || !equal(searchCharacters + i, matchString,
matchLength)) { | |
| 1192 if (i == delta) | |
| 1193 return kNotFound; | |
| 1194 searchHash += searchCharacters[i + matchLength]; | |
| 1195 searchHash -= searchCharacters[i]; | |
| 1196 ++i; | |
| 1197 } | |
| 1198 return index + i; | |
| 1199 } | |
| 1200 | |
| 1201 template<typename CharType> | |
| 1202 ALWAYS_INLINE size_t findIgnoringCaseInternal(const CharType* searchCharacters,
const LChar* matchString, unsigned index, unsigned searchLength, unsigned matchL
ength) | |
| 1203 { | |
| 1204 // delta is the number of additional times to test; delta == 0 means test on
ly once. | |
| 1205 unsigned delta = searchLength - matchLength; | |
| 1206 | |
| 1207 unsigned i = 0; | |
| 1208 while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) { | |
| 1209 if (i == delta) | |
| 1210 return kNotFound; | |
| 1211 ++i; | |
| 1212 } | |
| 1213 return index + i; | |
| 1214 } | |
| 1215 | |
| 1216 size_t StringImpl::findIgnoringCase(const LChar* matchString, unsigned index) | |
| 1217 { | |
| 1218 // Check for null or empty string to match against | |
| 1219 if (!matchString) | |
| 1220 return kNotFound; | |
| 1221 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString)
); | |
| 1222 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max()); | |
| 1223 unsigned matchLength = matchStringLength; | |
| 1224 if (!matchLength) | |
| 1225 return min(index, length()); | |
| 1226 | |
| 1227 // Check index & matchLength are in range. | |
| 1228 if (index > length()) | |
| 1229 return kNotFound; | |
| 1230 unsigned searchLength = length() - index; | |
| 1231 if (matchLength > searchLength) | |
| 1232 return kNotFound; | |
| 1233 | |
| 1234 if (is8Bit()) | |
| 1235 return findIgnoringCaseInternal(characters8() + index, matchString, inde
x, searchLength, matchLength); | |
| 1236 return findIgnoringCaseInternal(characters16() + index, matchString, index,
searchLength, matchLength); | |
| 1237 } | |
| 1238 | |
| 1239 template <typename SearchCharacterType, typename MatchCharacterType> | |
| 1240 ALWAYS_INLINE static size_t findInternal(const SearchCharacterType* searchCharac
ters, const MatchCharacterType* matchCharacters, unsigned index, unsigned search
Length, unsigned matchLength) | |
| 1241 { | |
| 1242 // Optimization: keep a running hash of the strings, | |
| 1243 // only call equal() if the hashes match. | |
| 1244 | |
| 1245 // delta is the number of additional times to test; delta == 0 means test on
ly once. | |
| 1246 unsigned delta = searchLength - matchLength; | |
| 1247 | |
| 1248 unsigned searchHash = 0; | |
| 1249 unsigned matchHash = 0; | |
| 1250 | |
| 1251 for (unsigned i = 0; i < matchLength; ++i) { | |
| 1252 searchHash += searchCharacters[i]; | |
| 1253 matchHash += matchCharacters[i]; | |
| 1254 } | |
| 1255 | |
| 1256 unsigned i = 0; | |
| 1257 // keep looping until we match | |
| 1258 while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacte
rs, matchLength)) { | |
| 1259 if (i == delta) | |
| 1260 return kNotFound; | |
| 1261 searchHash += searchCharacters[i + matchLength]; | |
| 1262 searchHash -= searchCharacters[i]; | |
| 1263 ++i; | |
| 1264 } | |
| 1265 return index + i; | |
| 1266 } | |
| 1267 | |
| 1268 size_t StringImpl::find(StringImpl* matchString) | |
| 1269 { | |
| 1270 // Check for null string to match against | |
| 1271 if (UNLIKELY(!matchString)) | |
| 1272 return kNotFound; | |
| 1273 unsigned matchLength = matchString->length(); | |
| 1274 | |
| 1275 // Optimization 1: fast case for strings of length 1. | |
| 1276 if (matchLength == 1) { | |
| 1277 if (is8Bit()) { | |
| 1278 if (matchString->is8Bit()) | |
| 1279 return WTF::find(characters8(), length(), matchString->character
s8()[0]); | |
| 1280 return WTF::find(characters8(), length(), matchString->characters16(
)[0]); | |
| 1281 } | |
| 1282 if (matchString->is8Bit()) | |
| 1283 return WTF::find(characters16(), length(), matchString->characters8(
)[0]); | |
| 1284 return WTF::find(characters16(), length(), matchString->characters16()[0
]); | |
| 1285 } | |
| 1286 | |
| 1287 // Check matchLength is in range. | |
| 1288 if (matchLength > length()) | |
| 1289 return kNotFound; | |
| 1290 | |
| 1291 // Check for empty string to match against | |
| 1292 if (UNLIKELY(!matchLength)) | |
| 1293 return 0; | |
| 1294 | |
| 1295 if (is8Bit()) { | |
| 1296 if (matchString->is8Bit()) | |
| 1297 return findInternal(characters8(), matchString->characters8(), 0, le
ngth(), matchLength); | |
| 1298 return findInternal(characters8(), matchString->characters16(), 0, lengt
h(), matchLength); | |
| 1299 } | |
| 1300 | |
| 1301 if (matchString->is8Bit()) | |
| 1302 return findInternal(characters16(), matchString->characters8(), 0, lengt
h(), matchLength); | |
| 1303 | |
| 1304 return findInternal(characters16(), matchString->characters16(), 0, length()
, matchLength); | |
| 1305 } | |
| 1306 | |
| 1307 size_t StringImpl::find(StringImpl* matchString, unsigned index) | |
| 1308 { | |
| 1309 // Check for null or empty string to match against | |
| 1310 if (UNLIKELY(!matchString)) | |
| 1311 return kNotFound; | |
| 1312 | |
| 1313 unsigned matchLength = matchString->length(); | |
| 1314 | |
| 1315 // Optimization 1: fast case for strings of length 1. | |
| 1316 if (matchLength == 1) { | |
| 1317 if (is8Bit()) | |
| 1318 return WTF::find(characters8(), length(), (*matchString)[0], index); | |
| 1319 return WTF::find(characters16(), length(), (*matchString)[0], index); | |
| 1320 } | |
| 1321 | |
| 1322 if (UNLIKELY(!matchLength)) | |
| 1323 return min(index, length()); | |
| 1324 | |
| 1325 // Check index & matchLength are in range. | |
| 1326 if (index > length()) | |
| 1327 return kNotFound; | |
| 1328 unsigned searchLength = length() - index; | |
| 1329 if (matchLength > searchLength) | |
| 1330 return kNotFound; | |
| 1331 | |
| 1332 if (is8Bit()) { | |
| 1333 if (matchString->is8Bit()) | |
| 1334 return findInternal(characters8() + index, matchString->characters8(
), index, searchLength, matchLength); | |
| 1335 return findInternal(characters8() + index, matchString->characters16(),
index, searchLength, matchLength); | |
| 1336 } | |
| 1337 | |
| 1338 if (matchString->is8Bit()) | |
| 1339 return findInternal(characters16() + index, matchString->characters8(),
index, searchLength, matchLength); | |
| 1340 | |
| 1341 return findInternal(characters16() + index, matchString->characters16(), ind
ex, searchLength, matchLength); | |
| 1342 } | |
| 1343 | |
| 1344 template <typename SearchCharacterType, typename MatchCharacterType> | |
| 1345 ALWAYS_INLINE static size_t findIgnoringCaseInner(const SearchCharacterType* sea
rchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsign
ed searchLength, unsigned matchLength) | |
| 1346 { | |
| 1347 // delta is the number of additional times to test; delta == 0 means test on
ly once. | |
| 1348 unsigned delta = searchLength - matchLength; | |
| 1349 | |
| 1350 unsigned i = 0; | |
| 1351 // keep looping until we match | |
| 1352 while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength
)) { | |
| 1353 if (i == delta) | |
| 1354 return kNotFound; | |
| 1355 ++i; | |
| 1356 } | |
| 1357 return index + i; | |
| 1358 } | |
| 1359 | |
| 1360 size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index) | |
| 1361 { | |
| 1362 // Check for null or empty string to match against | |
| 1363 if (!matchString) | |
| 1364 return kNotFound; | |
| 1365 unsigned matchLength = matchString->length(); | |
| 1366 if (!matchLength) | |
| 1367 return min(index, length()); | |
| 1368 | |
| 1369 // Check index & matchLength are in range. | |
| 1370 if (index > length()) | |
| 1371 return kNotFound; | |
| 1372 unsigned searchLength = length() - index; | |
| 1373 if (matchLength > searchLength) | |
| 1374 return kNotFound; | |
| 1375 | |
| 1376 if (is8Bit()) { | |
| 1377 if (matchString->is8Bit()) | |
| 1378 return findIgnoringCaseInner(characters8() + index, matchString->cha
racters8(), index, searchLength, matchLength); | |
| 1379 return findIgnoringCaseInner(characters8() + index, matchString->charact
ers16(), index, searchLength, matchLength); | |
| 1380 } | |
| 1381 | |
| 1382 if (matchString->is8Bit()) | |
| 1383 return findIgnoringCaseInner(characters16() + index, matchString->charac
ters8(), index, searchLength, matchLength); | |
| 1384 | |
| 1385 return findIgnoringCaseInner(characters16() + index, matchString->characters
16(), index, searchLength, matchLength); | |
| 1386 } | |
| 1387 | |
| 1388 template <typename SearchCharacterType, typename MatchCharacterType> | |
| 1389 ALWAYS_INLINE static size_t findIgnoringASCIICaseInner(const SearchCharacterType
* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, u
nsigned searchLength, unsigned matchLength) | |
| 1390 { | |
| 1391 // delta is the number of additional times to test; delta == 0 means test on
ly once. | |
| 1392 unsigned delta = searchLength - matchLength; | |
| 1393 | |
| 1394 unsigned i = 0; | |
| 1395 // keep looping until we match | |
| 1396 while (!equalIgnoringASCIICase(searchCharacters + i, matchCharacters, matchL
ength)) { | |
| 1397 if (i == delta) | |
| 1398 return kNotFound; | |
| 1399 ++i; | |
| 1400 } | |
| 1401 return index + i; | |
| 1402 } | |
| 1403 | |
| 1404 size_t StringImpl::findIgnoringASCIICase(StringImpl* matchString, unsigned index
) | |
| 1405 { | |
| 1406 // Check for null or empty string to match against | |
| 1407 if (!matchString) | |
| 1408 return kNotFound; | |
| 1409 unsigned matchLength = matchString->length(); | |
| 1410 if (!matchLength) | |
| 1411 return min(index, length()); | |
| 1412 | |
| 1413 // Check index & matchLength are in range. | |
| 1414 if (index > length()) | |
| 1415 return kNotFound; | |
| 1416 unsigned searchLength = length() - index; | |
| 1417 if (matchLength > searchLength) | |
| 1418 return kNotFound; | |
| 1419 | |
| 1420 if (is8Bit()) { | |
| 1421 const LChar* searchStart = characters8() + index; | |
| 1422 if (matchString->is8Bit()) | |
| 1423 return findIgnoringASCIICaseInner(searchStart, matchString->characte
rs8(), index, searchLength, matchLength); | |
| 1424 return findIgnoringASCIICaseInner(searchStart, matchString->characters16
(), index, searchLength, matchLength); | |
| 1425 } | |
| 1426 | |
| 1427 const UChar* searchStart = characters16() + index; | |
| 1428 if (matchString->is8Bit()) | |
| 1429 return findIgnoringASCIICaseInner(searchStart, matchString->characters8(
), index, searchLength, matchLength); | |
| 1430 return findIgnoringASCIICaseInner(searchStart, matchString->characters16(),
index, searchLength, matchLength); | |
| 1431 } | |
| 1432 | |
| 1433 size_t StringImpl::findNextLineStart(unsigned index) | |
| 1434 { | |
| 1435 if (is8Bit()) | |
| 1436 return WTF::findNextLineStart(characters8(), m_length, index); | |
| 1437 return WTF::findNextLineStart(characters16(), m_length, index); | |
| 1438 } | |
| 1439 | |
| 1440 size_t StringImpl::count(LChar c) const | |
| 1441 { | |
| 1442 int count = 0; | |
| 1443 if (is8Bit()) { | |
| 1444 for (size_t i = 0; i < m_length; ++i) | |
| 1445 count += characters8()[i] == c; | |
| 1446 } else { | |
| 1447 for (size_t i = 0; i < m_length; ++i) | |
| 1448 count += characters16()[i] == c; | |
| 1449 } | |
| 1450 return count; | |
| 1451 } | |
| 1452 | |
| 1453 size_t StringImpl::reverseFind(UChar c, unsigned index) | |
| 1454 { | |
| 1455 if (is8Bit()) | |
| 1456 return WTF::reverseFind(characters8(), m_length, c, index); | |
| 1457 return WTF::reverseFind(characters16(), m_length, c, index); | |
| 1458 } | |
| 1459 | |
| 1460 template <typename SearchCharacterType, typename MatchCharacterType> | |
| 1461 ALWAYS_INLINE static size_t reverseFindInner(const SearchCharacterType* searchCh
aracters, const MatchCharacterType* matchCharacters, unsigned index, unsigned le
ngth, unsigned matchLength) | |
| 1462 { | |
| 1463 // Optimization: keep a running hash of the strings, | |
| 1464 // only call equal if the hashes match. | |
| 1465 | |
| 1466 // delta is the number of additional times to test; delta == 0 means test on
ly once. | |
| 1467 unsigned delta = min(index, length - matchLength); | |
| 1468 | |
| 1469 unsigned searchHash = 0; | |
| 1470 unsigned matchHash = 0; | |
| 1471 for (unsigned i = 0; i < matchLength; ++i) { | |
| 1472 searchHash += searchCharacters[delta + i]; | |
| 1473 matchHash += matchCharacters[i]; | |
| 1474 } | |
| 1475 | |
| 1476 // keep looping until we match | |
| 1477 while (searchHash != matchHash || !equal(searchCharacters + delta, matchChar
acters, matchLength)) { | |
| 1478 if (!delta) | |
| 1479 return kNotFound; | |
| 1480 --delta; | |
| 1481 searchHash -= searchCharacters[delta + matchLength]; | |
| 1482 searchHash += searchCharacters[delta]; | |
| 1483 } | |
| 1484 return delta; | |
| 1485 } | |
| 1486 | |
| 1487 size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index) | |
| 1488 { | |
| 1489 // Check for null or empty string to match against | |
| 1490 if (!matchString) | |
| 1491 return kNotFound; | |
| 1492 unsigned matchLength = matchString->length(); | |
| 1493 unsigned ourLength = length(); | |
| 1494 if (!matchLength) | |
| 1495 return min(index, ourLength); | |
| 1496 | |
| 1497 // Optimization 1: fast case for strings of length 1. | |
| 1498 if (matchLength == 1) { | |
| 1499 if (is8Bit()) | |
| 1500 return WTF::reverseFind(characters8(), ourLength, (*matchString)[0],
index); | |
| 1501 return WTF::reverseFind(characters16(), ourLength, (*matchString)[0], in
dex); | |
| 1502 } | |
| 1503 | |
| 1504 // Check index & matchLength are in range. | |
| 1505 if (matchLength > ourLength) | |
| 1506 return kNotFound; | |
| 1507 | |
| 1508 if (is8Bit()) { | |
| 1509 if (matchString->is8Bit()) | |
| 1510 return reverseFindInner(characters8(), matchString->characters8(), i
ndex, ourLength, matchLength); | |
| 1511 return reverseFindInner(characters8(), matchString->characters16(), inde
x, ourLength, matchLength); | |
| 1512 } | |
| 1513 | |
| 1514 if (matchString->is8Bit()) | |
| 1515 return reverseFindInner(characters16(), matchString->characters8(), inde
x, ourLength, matchLength); | |
| 1516 | |
| 1517 return reverseFindInner(characters16(), matchString->characters16(), index,
ourLength, matchLength); | |
| 1518 } | |
| 1519 | |
| 1520 ALWAYS_INLINE static bool equalSubstring(const StringImpl* stringImpl, unsigned
startOffset, const LChar* matchString, unsigned matchLength) | |
| 1521 { | |
| 1522 ASSERT(stringImpl); | |
| 1523 ASSERT(matchLength <= stringImpl->length()); | |
| 1524 ASSERT(startOffset + matchLength <= stringImpl->length()); | |
| 1525 | |
| 1526 if (stringImpl->is8Bit()) | |
| 1527 return equal(stringImpl->characters8() + startOffset, matchString, match
Length); | |
| 1528 return equal(stringImpl->characters16() + startOffset, matchString, matchLen
gth); | |
| 1529 } | |
| 1530 | |
| 1531 bool StringImpl::startsWith(UChar character) const | |
| 1532 { | |
| 1533 return m_length && (*this)[0] == character; | |
| 1534 } | |
| 1535 | |
| 1536 bool StringImpl::startsWith(const char* prefixString, unsigned prefixLength) con
st | |
| 1537 { | |
| 1538 ASSERT(prefixLength); | |
| 1539 if (prefixLength > length()) | |
| 1540 return false; | |
| 1541 return equalSubstring(this, 0, reinterpret_cast<const LChar*>(prefixString),
prefixLength); | |
| 1542 } | |
| 1543 | |
| 1544 ALWAYS_INLINE static bool equalSubstring(const StringImpl* stringImpl, unsigned
startOffset, const StringImpl* matchString) | |
| 1545 { | |
| 1546 ASSERT(stringImpl); | |
| 1547 ASSERT(matchString); | |
| 1548 ASSERT(matchString->length() <= stringImpl->length()); | |
| 1549 ASSERT(startOffset + matchString->length() <= stringImpl->length()); | |
| 1550 | |
| 1551 unsigned matchLength = matchString->length(); | |
| 1552 if (matchString->is8Bit()) | |
| 1553 return equalSubstring(stringImpl, startOffset, matchString->characters8(
), matchLength); | |
| 1554 if (stringImpl->is8Bit()) | |
| 1555 return equal(stringImpl->characters8() + startOffset, matchString->chara
cters16(), matchLength); | |
| 1556 return equal(stringImpl->characters16() + startOffset, matchString->characte
rs16(), matchLength); | |
| 1557 } | |
| 1558 | |
| 1559 bool StringImpl::startsWith(const StringImpl* prefix) const | |
| 1560 { | |
| 1561 ASSERT(prefix); | |
| 1562 if (prefix->length() > length()) | |
| 1563 return false; | |
| 1564 return equalSubstring(this, 0, prefix); | |
| 1565 } | |
| 1566 | |
| 1567 ALWAYS_INLINE static bool equalSubstringIgnoringCase(const StringImpl* stringImp
l, unsigned startOffset, const LChar* matchString, unsigned matchLength) | |
| 1568 { | |
| 1569 ASSERT(stringImpl); | |
| 1570 ASSERT(matchLength <= stringImpl->length()); | |
| 1571 ASSERT(startOffset + matchLength <= stringImpl->length()); | |
| 1572 | |
| 1573 if (stringImpl->is8Bit()) | |
| 1574 return equalIgnoringCase(stringImpl->characters8() + startOffset, matchS
tring, matchLength); | |
| 1575 return equalIgnoringCase(stringImpl->characters16() + startOffset, matchStri
ng, matchLength); | |
| 1576 } | |
| 1577 | |
| 1578 bool StringImpl::startsWithIgnoringCase(const char* prefixString, unsigned prefi
xLength) const | |
| 1579 { | |
| 1580 ASSERT(prefixLength); | |
| 1581 if (prefixLength > length()) | |
| 1582 return false; | |
| 1583 return equalSubstringIgnoringCase(this, 0, reinterpret_cast<const LChar*>(pr
efixString), prefixLength); | |
| 1584 } | |
| 1585 | |
| 1586 ALWAYS_INLINE static bool equalSubstringIgnoringCase(const StringImpl* stringImp
l, unsigned startOffset, const StringImpl* matchString) | |
| 1587 { | |
| 1588 ASSERT(stringImpl); | |
| 1589 ASSERT(matchString); | |
| 1590 ASSERT(matchString->length() <= stringImpl->length()); | |
| 1591 ASSERT(startOffset + matchString->length() <= stringImpl->length()); | |
| 1592 | |
| 1593 unsigned matchLength = matchString->length(); | |
| 1594 if (matchString->is8Bit()) | |
| 1595 return equalSubstringIgnoringCase(stringImpl, startOffset, matchString->
characters8(), matchLength); | |
| 1596 if (stringImpl->is8Bit()) | |
| 1597 return equalIgnoringCase(stringImpl->characters8() + startOffset, matchS
tring->characters16(), matchLength); | |
| 1598 return equalIgnoringCase(stringImpl->characters16() + startOffset, matchStri
ng->characters16(), matchLength); | |
| 1599 } | |
| 1600 | |
| 1601 bool StringImpl::startsWithIgnoringCase(const StringImpl* prefix) const | |
| 1602 { | |
| 1603 ASSERT(prefix); | |
| 1604 if (prefix->length() > length()) | |
| 1605 return false; | |
| 1606 return equalSubstringIgnoringCase(this, 0, prefix); | |
| 1607 } | |
| 1608 | |
| 1609 ALWAYS_INLINE static bool equalSubstringIgnoringASCIICase(const StringImpl* stri
ngImpl, unsigned startOffset, const LChar* matchString, unsigned matchLength) | |
| 1610 { | |
| 1611 ASSERT(stringImpl); | |
| 1612 ASSERT(matchLength <= stringImpl->length()); | |
| 1613 ASSERT(startOffset + matchLength <= stringImpl->length()); | |
| 1614 | |
| 1615 if (stringImpl->is8Bit()) | |
| 1616 return equalIgnoringASCIICase(stringImpl->characters8() + startOffset, m
atchString, matchLength); | |
| 1617 return equalIgnoringASCIICase(stringImpl->characters16() + startOffset, matc
hString, matchLength); | |
| 1618 } | |
| 1619 | |
| 1620 bool StringImpl::startsWithIgnoringASCIICase(const char* prefixString, unsigned
prefixLength) const | |
| 1621 { | |
| 1622 ASSERT(prefixLength); | |
| 1623 if (prefixLength > length()) | |
| 1624 return false; | |
| 1625 return equalSubstringIgnoringASCIICase(this, 0, reinterpret_cast<const LChar
*>(prefixString), prefixLength); | |
| 1626 } | |
| 1627 | |
| 1628 ALWAYS_INLINE static bool equalSubstringIgnoringASCIICase(const StringImpl* stri
ngImpl, unsigned startOffset, const StringImpl* matchString) | |
| 1629 { | |
| 1630 ASSERT(stringImpl); | |
| 1631 ASSERT(matchString); | |
| 1632 ASSERT(matchString->length() <= stringImpl->length()); | |
| 1633 ASSERT(startOffset + matchString->length() <= stringImpl->length()); | |
| 1634 | |
| 1635 unsigned matchLength = matchString->length(); | |
| 1636 if (matchString->is8Bit()) | |
| 1637 return equalSubstringIgnoringASCIICase(stringImpl, startOffset, matchStr
ing->characters8(), matchLength); | |
| 1638 if (stringImpl->is8Bit()) | |
| 1639 return equalIgnoringASCIICase(stringImpl->characters8() + startOffset, m
atchString->characters16(), matchLength); | |
| 1640 return equalIgnoringASCIICase(stringImpl->characters16() + startOffset, matc
hString->characters16(), matchLength); | |
| 1641 } | |
| 1642 | |
| 1643 bool StringImpl::startsWithIgnoringASCIICase(const StringImpl* prefix) const | |
| 1644 { | |
| 1645 ASSERT(prefix); | |
| 1646 if (prefix->length() > length()) | |
| 1647 return false; | |
| 1648 return equalSubstringIgnoringASCIICase(this, 0, prefix); | |
| 1649 } | |
| 1650 | |
| 1651 bool StringImpl::endsWith(UChar character) const | |
| 1652 { | |
| 1653 return m_length && (*this)[m_length - 1] == character; | |
| 1654 } | |
| 1655 | |
| 1656 bool StringImpl::endsWith(const char* suffixString, unsigned suffixLength) const | |
| 1657 { | |
| 1658 ASSERT(suffixLength); | |
| 1659 if (suffixLength > length()) | |
| 1660 return false; | |
| 1661 return equalSubstring(this, length() - suffixLength, reinterpret_cast<const
LChar*>(suffixString), suffixLength); | |
| 1662 } | |
| 1663 | |
| 1664 bool StringImpl::endsWith(const StringImpl* suffix) const | |
| 1665 { | |
| 1666 ASSERT(suffix); | |
| 1667 unsigned suffixLength = suffix->length(); | |
| 1668 if (suffixLength > length()) | |
| 1669 return false; | |
| 1670 return equalSubstring(this, length() - suffixLength, suffix); | |
| 1671 } | |
| 1672 | |
| 1673 bool StringImpl::endsWithIgnoringCase(const char* suffixString, unsigned suffixL
ength) const | |
| 1674 { | |
| 1675 ASSERT(suffixLength); | |
| 1676 if (suffixLength > length()) | |
| 1677 return false; | |
| 1678 return equalSubstringIgnoringCase(this, length() - suffixLength, reinterpret
_cast<const LChar*>(suffixString), suffixLength); | |
| 1679 } | |
| 1680 | |
| 1681 bool StringImpl::endsWithIgnoringCase(const StringImpl* suffix) const | |
| 1682 { | |
| 1683 ASSERT(suffix); | |
| 1684 unsigned suffixLength = suffix->length(); | |
| 1685 if (suffixLength > length()) | |
| 1686 return false; | |
| 1687 return equalSubstringIgnoringCase(this, length() - suffixLength, suffix); | |
| 1688 } | |
| 1689 | |
| 1690 bool StringImpl::endsWithIgnoringASCIICase(const char* suffixString, unsigned su
ffixLength) const | |
| 1691 { | |
| 1692 ASSERT(suffixLength); | |
| 1693 if (suffixLength > length()) | |
| 1694 return false; | |
| 1695 return equalSubstringIgnoringASCIICase(this, length() - suffixLength, reinte
rpret_cast<const LChar*>(suffixString), suffixLength); | |
| 1696 } | |
| 1697 | |
| 1698 bool StringImpl::endsWithIgnoringASCIICase(const StringImpl* suffix) const | |
| 1699 { | |
| 1700 ASSERT(suffix); | |
| 1701 unsigned suffixLength = suffix->length(); | |
| 1702 if (suffixLength > length()) | |
| 1703 return false; | |
| 1704 return equalSubstringIgnoringASCIICase(this, length() - suffixLength, suffix
); | |
| 1705 } | |
| 1706 | |
| 1707 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC) | |
| 1708 { | |
| 1709 if (oldC == newC) | |
| 1710 return this; | |
| 1711 | |
| 1712 if (find(oldC) == kNotFound) | |
| 1713 return this; | |
| 1714 | |
| 1715 unsigned i; | |
| 1716 if (is8Bit()) { | |
| 1717 if (newC <= 0xff) { | |
| 1718 LChar* data; | |
| 1719 LChar oldChar = static_cast<LChar>(oldC); | |
| 1720 LChar newChar = static_cast<LChar>(newC); | |
| 1721 | |
| 1722 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
| 1723 | |
| 1724 for (i = 0; i != m_length; ++i) { | |
| 1725 LChar ch = characters8()[i]; | |
| 1726 if (ch == oldChar) | |
| 1727 ch = newChar; | |
| 1728 data[i] = ch; | |
| 1729 } | |
| 1730 return newImpl.release(); | |
| 1731 } | |
| 1732 | |
| 1733 // There is the possibility we need to up convert from 8 to 16 bit, | |
| 1734 // create a 16 bit string for the result. | |
| 1735 UChar* data; | |
| 1736 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
| 1737 | |
| 1738 for (i = 0; i != m_length; ++i) { | |
| 1739 UChar ch = characters8()[i]; | |
| 1740 if (ch == oldC) | |
| 1741 ch = newC; | |
| 1742 data[i] = ch; | |
| 1743 } | |
| 1744 | |
| 1745 return newImpl.release(); | |
| 1746 } | |
| 1747 | |
| 1748 UChar* data; | |
| 1749 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
| 1750 | |
| 1751 for (i = 0; i != m_length; ++i) { | |
| 1752 UChar ch = characters16()[i]; | |
| 1753 if (ch == oldC) | |
| 1754 ch = newC; | |
| 1755 data[i] = ch; | |
| 1756 } | |
| 1757 return newImpl.release(); | |
| 1758 } | |
| 1759 | |
| 1760 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToR
eplace, StringImpl* str) | |
| 1761 { | |
| 1762 position = min(position, length()); | |
| 1763 lengthToReplace = min(lengthToReplace, length() - position); | |
| 1764 unsigned lengthToInsert = str ? str->length() : 0; | |
| 1765 if (!lengthToReplace && !lengthToInsert) | |
| 1766 return this; | |
| 1767 | |
| 1768 RELEASE_ASSERT((length() - lengthToReplace) < (numeric_limits<unsigned>::max
() - lengthToInsert)); | |
| 1769 | |
| 1770 if (is8Bit() && (!str || str->is8Bit())) { | |
| 1771 LChar* data; | |
| 1772 RefPtr<StringImpl> newImpl = | |
| 1773 createUninitialized(length() - lengthToReplace + lengthToInsert, data); | |
| 1774 memcpy(data, characters8(), position * sizeof(LChar)); | |
| 1775 if (str) | |
| 1776 memcpy(data + position, str->characters8(), lengthToInsert * sizeof(
LChar)); | |
| 1777 memcpy(data + position + lengthToInsert, characters8() + position + leng
thToReplace, | |
| 1778 (length() - position - lengthToReplace) * sizeof(LChar)); | |
| 1779 return newImpl.release(); | |
| 1780 } | |
| 1781 UChar* data; | |
| 1782 RefPtr<StringImpl> newImpl = | 1886 RefPtr<StringImpl> newImpl = |
| 1783 createUninitialized(length() - lengthToReplace + lengthToInsert, data); | 1887 createUninitialized(length() - lengthToReplace + lengthToInsert, data); |
| 1784 if (is8Bit()) | 1888 memcpy(data, characters8(), position * sizeof(LChar)); |
| 1785 for (unsigned i = 0; i < position; ++i) | 1889 if (str) |
| 1786 data[i] = characters8()[i]; | 1890 memcpy(data + position, str->characters8(), |
| 1891 lengthToInsert * sizeof(LChar)); |
| 1892 memcpy(data + position + lengthToInsert, |
| 1893 characters8() + position + lengthToReplace, |
| 1894 (length() - position - lengthToReplace) * sizeof(LChar)); |
| 1895 return newImpl.release(); |
| 1896 } |
| 1897 UChar* data; |
| 1898 RefPtr<StringImpl> newImpl = |
| 1899 createUninitialized(length() - lengthToReplace + lengthToInsert, data); |
| 1900 if (is8Bit()) |
| 1901 for (unsigned i = 0; i < position; ++i) |
| 1902 data[i] = characters8()[i]; |
| 1903 else |
| 1904 memcpy(data, characters16(), position * sizeof(UChar)); |
| 1905 if (str) { |
| 1906 if (str->is8Bit()) |
| 1907 for (unsigned i = 0; i < lengthToInsert; ++i) |
| 1908 data[i + position] = str->characters8()[i]; |
| 1787 else | 1909 else |
| 1788 memcpy(data, characters16(), position * sizeof(UChar)); | 1910 memcpy(data + position, str->characters16(), |
| 1789 if (str) { | 1911 lengthToInsert * sizeof(UChar)); |
| 1790 if (str->is8Bit()) | 1912 } |
| 1791 for (unsigned i = 0; i < lengthToInsert; ++i) | 1913 if (is8Bit()) { |
| 1792 data[i + position] = str->characters8()[i]; | 1914 for (unsigned i = 0; i < length() - position - lengthToReplace; ++i) |
| 1793 else | 1915 data[i + position + lengthToInsert] = |
| 1794 memcpy(data + position, str->characters16(), lengthToInsert * sizeof
(UChar)); | 1916 characters8()[i + position + lengthToReplace]; |
| 1795 } | 1917 } else { |
| 1796 if (is8Bit()) { | 1918 memcpy(data + position + lengthToInsert, |
| 1797 for (unsigned i = 0; i < length() - position - lengthToReplace; ++i) | 1919 characters16() + position + lengthToReplace, |
| 1798 data[i + position + lengthToInsert] = characters8()[i + position + l
engthToReplace]; | 1920 (length() - position - lengthToReplace) * sizeof(UChar)); |
| 1799 } else { | 1921 } |
| 1800 memcpy(data + position + lengthToInsert, characters16() + position + len
gthToReplace, | 1922 return newImpl.release(); |
| 1801 (length() - position - lengthToReplace) * sizeof(UChar)); | 1923 } |
| 1802 } | 1924 |
| 1925 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, |
| 1926 StringImpl* replacement) { |
| 1927 if (!replacement) |
| 1928 return this; |
| 1929 |
| 1930 if (replacement->is8Bit()) |
| 1931 return replace(pattern, replacement->characters8(), replacement->length()); |
| 1932 |
| 1933 return replace(pattern, replacement->characters16(), replacement->length()); |
| 1934 } |
| 1935 |
| 1936 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, |
| 1937 const LChar* replacement, |
| 1938 unsigned repStrLength) { |
| 1939 ASSERT(replacement); |
| 1940 |
| 1941 size_t srcSegmentStart = 0; |
| 1942 unsigned matchCount = 0; |
| 1943 |
| 1944 // Count the matches. |
| 1945 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) { |
| 1946 ++matchCount; |
| 1947 ++srcSegmentStart; |
| 1948 } |
| 1949 |
| 1950 // If we have 0 matches then we don't have to do any more work. |
| 1951 if (!matchCount) |
| 1952 return this; |
| 1953 |
| 1954 RELEASE_ASSERT(!repStrLength || |
| 1955 matchCount <= numeric_limits<unsigned>::max() / repStrLength); |
| 1956 |
| 1957 unsigned replaceSize = matchCount * repStrLength; |
| 1958 unsigned newSize = m_length - matchCount; |
| 1959 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize)); |
| 1960 |
| 1961 newSize += replaceSize; |
| 1962 |
| 1963 // Construct the new data. |
| 1964 size_t srcSegmentEnd; |
| 1965 unsigned srcSegmentLength; |
| 1966 srcSegmentStart = 0; |
| 1967 unsigned dstOffset = 0; |
| 1968 |
| 1969 if (is8Bit()) { |
| 1970 LChar* data; |
| 1971 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); |
| 1972 |
| 1973 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { |
| 1974 srcSegmentLength = srcSegmentEnd - srcSegmentStart; |
| 1975 memcpy(data + dstOffset, characters8() + srcSegmentStart, |
| 1976 srcSegmentLength * sizeof(LChar)); |
| 1977 dstOffset += srcSegmentLength; |
| 1978 memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar)); |
| 1979 dstOffset += repStrLength; |
| 1980 srcSegmentStart = srcSegmentEnd + 1; |
| 1981 } |
| 1982 |
| 1983 srcSegmentLength = m_length - srcSegmentStart; |
| 1984 memcpy(data + dstOffset, characters8() + srcSegmentStart, |
| 1985 srcSegmentLength * sizeof(LChar)); |
| 1986 |
| 1987 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); |
| 1988 |
| 1803 return newImpl.release(); | 1989 return newImpl.release(); |
| 1804 } | 1990 } |
| 1805 | 1991 |
| 1806 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacemen
t) | 1992 UChar* data; |
| 1807 { | 1993 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); |
| 1808 if (!replacement) | 1994 |
| 1809 return this; | 1995 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { |
| 1810 | 1996 srcSegmentLength = srcSegmentEnd - srcSegmentStart; |
| 1811 if (replacement->is8Bit()) | 1997 memcpy(data + dstOffset, characters16() + srcSegmentStart, |
| 1812 return replace(pattern, replacement->characters8(), replacement->length(
)); | 1998 srcSegmentLength * sizeof(UChar)); |
| 1813 | 1999 |
| 1814 return replace(pattern, replacement->characters16(), replacement->length()); | 2000 dstOffset += srcSegmentLength; |
| 1815 } | 2001 for (unsigned i = 0; i < repStrLength; ++i) |
| 1816 | 2002 data[i + dstOffset] = replacement[i]; |
| 1817 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const LChar* replaceme
nt, unsigned repStrLength) | 2003 |
| 1818 { | 2004 dstOffset += repStrLength; |
| 1819 ASSERT(replacement); | 2005 srcSegmentStart = srcSegmentEnd + 1; |
| 1820 | 2006 } |
| 1821 size_t srcSegmentStart = 0; | 2007 |
| 1822 unsigned matchCount = 0; | 2008 srcSegmentLength = m_length - srcSegmentStart; |
| 1823 | 2009 memcpy(data + dstOffset, characters16() + srcSegmentStart, |
| 1824 // Count the matches. | 2010 srcSegmentLength * sizeof(UChar)); |
| 1825 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) { | 2011 |
| 1826 ++matchCount; | 2012 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); |
| 1827 ++srcSegmentStart; | 2013 |
| 1828 } | 2014 return newImpl.release(); |
| 1829 | 2015 } |
| 1830 // If we have 0 matches then we don't have to do any more work. | 2016 |
| 1831 if (!matchCount) | 2017 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, |
| 1832 return this; | 2018 const UChar* replacement, |
| 1833 | 2019 unsigned repStrLength) { |
| 1834 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max(
) / repStrLength); | 2020 ASSERT(replacement); |
| 1835 | 2021 |
| 1836 unsigned replaceSize = matchCount * repStrLength; | 2022 size_t srcSegmentStart = 0; |
| 1837 unsigned newSize = m_length - matchCount; | 2023 unsigned matchCount = 0; |
| 1838 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize)); | 2024 |
| 1839 | 2025 // Count the matches. |
| 1840 newSize += replaceSize; | 2026 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) { |
| 1841 | 2027 ++matchCount; |
| 1842 // Construct the new data. | 2028 ++srcSegmentStart; |
| 1843 size_t srcSegmentEnd; | 2029 } |
| 1844 unsigned srcSegmentLength; | 2030 |
| 1845 srcSegmentStart = 0; | 2031 // If we have 0 matches then we don't have to do any more work. |
| 1846 unsigned dstOffset = 0; | 2032 if (!matchCount) |
| 1847 | 2033 return this; |
| 1848 if (is8Bit()) { | 2034 |
| 1849 LChar* data; | 2035 RELEASE_ASSERT(!repStrLength || |
| 1850 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | 2036 matchCount <= numeric_limits<unsigned>::max() / repStrLength); |
| 1851 | 2037 |
| 1852 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { | 2038 unsigned replaceSize = matchCount * repStrLength; |
| 1853 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | 2039 unsigned newSize = m_length - matchCount; |
| 1854 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegment
Length * sizeof(LChar)); | 2040 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize)); |
| 1855 dstOffset += srcSegmentLength; | 2041 |
| 1856 memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar)); | 2042 newSize += replaceSize; |
| 1857 dstOffset += repStrLength; | 2043 |
| 1858 srcSegmentStart = srcSegmentEnd + 1; | 2044 // Construct the new data. |
| 1859 } | 2045 size_t srcSegmentEnd; |
| 1860 | 2046 unsigned srcSegmentLength; |
| 1861 srcSegmentLength = m_length - srcSegmentStart; | 2047 srcSegmentStart = 0; |
| 1862 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLeng
th * sizeof(LChar)); | 2048 unsigned dstOffset = 0; |
| 1863 | 2049 |
| 1864 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); | 2050 if (is8Bit()) { |
| 1865 | |
| 1866 return newImpl.release(); | |
| 1867 } | |
| 1868 | |
| 1869 UChar* data; | 2051 UChar* data; |
| 1870 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | 2052 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); |
| 1871 | 2053 |
| 1872 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { | 2054 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { |
| 1873 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | 2055 srcSegmentLength = srcSegmentEnd - srcSegmentStart; |
| 1874 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLen
gth * sizeof(UChar)); | 2056 for (unsigned i = 0; i < srcSegmentLength; ++i) |
| 1875 | 2057 data[i + dstOffset] = characters8()[i + srcSegmentStart]; |
| 1876 dstOffset += srcSegmentLength; | 2058 |
| 1877 for (unsigned i = 0; i < repStrLength; ++i) | 2059 dstOffset += srcSegmentLength; |
| 1878 data[i + dstOffset] = replacement[i]; | 2060 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar)); |
| 1879 | 2061 |
| 1880 dstOffset += repStrLength; | 2062 dstOffset += repStrLength; |
| 1881 srcSegmentStart = srcSegmentEnd + 1; | 2063 srcSegmentStart = srcSegmentEnd + 1; |
| 1882 } | 2064 } |
| 1883 | 2065 |
| 1884 srcSegmentLength = m_length - srcSegmentStart; | 2066 srcSegmentLength = m_length - srcSegmentStart; |
| 1885 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength
* sizeof(UChar)); | 2067 for (unsigned i = 0; i < srcSegmentLength; ++i) |
| 2068 data[i + dstOffset] = characters8()[i + srcSegmentStart]; |
| 1886 | 2069 |
| 1887 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); | 2070 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); |
| 1888 | 2071 |
| 1889 return newImpl.release(); | 2072 return newImpl.release(); |
| 1890 } | 2073 } |
| 1891 | 2074 |
| 1892 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const UChar* replaceme
nt, unsigned repStrLength) | 2075 UChar* data; |
| 1893 { | 2076 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); |
| 1894 ASSERT(replacement); | 2077 |
| 1895 | 2078 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { |
| 1896 size_t srcSegmentStart = 0; | 2079 srcSegmentLength = srcSegmentEnd - srcSegmentStart; |
| 1897 unsigned matchCount = 0; | 2080 memcpy(data + dstOffset, characters16() + srcSegmentStart, |
| 1898 | 2081 srcSegmentLength * sizeof(UChar)); |
| 1899 // Count the matches. | 2082 |
| 1900 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) { | 2083 dstOffset += srcSegmentLength; |
| 1901 ++matchCount; | 2084 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar)); |
| 1902 ++srcSegmentStart; | 2085 |
| 1903 } | 2086 dstOffset += repStrLength; |
| 1904 | 2087 srcSegmentStart = srcSegmentEnd + 1; |
| 1905 // If we have 0 matches then we don't have to do any more work. | 2088 } |
| 1906 if (!matchCount) | 2089 |
| 1907 return this; | 2090 srcSegmentLength = m_length - srcSegmentStart; |
| 1908 | 2091 memcpy(data + dstOffset, characters16() + srcSegmentStart, |
| 1909 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max(
) / repStrLength); | 2092 srcSegmentLength * sizeof(UChar)); |
| 1910 | 2093 |
| 1911 unsigned replaceSize = matchCount * repStrLength; | 2094 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); |
| 1912 unsigned newSize = m_length - matchCount; | 2095 |
| 1913 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize)); | 2096 return newImpl.release(); |
| 1914 | 2097 } |
| 1915 newSize += replaceSize; | 2098 |
| 1916 | 2099 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, |
| 1917 // Construct the new data. | 2100 StringImpl* replacement) { |
| 1918 size_t srcSegmentEnd; | 2101 if (!pattern || !replacement) |
| 1919 unsigned srcSegmentLength; | 2102 return this; |
| 1920 srcSegmentStart = 0; | 2103 |
| 1921 unsigned dstOffset = 0; | 2104 unsigned patternLength = pattern->length(); |
| 1922 | 2105 if (!patternLength) |
| 1923 if (is8Bit()) { | 2106 return this; |
| 1924 UChar* data; | 2107 |
| 1925 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | 2108 unsigned repStrLength = replacement->length(); |
| 1926 | 2109 size_t srcSegmentStart = 0; |
| 1927 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { | 2110 unsigned matchCount = 0; |
| 1928 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | 2111 |
| 1929 for (unsigned i = 0; i < srcSegmentLength; ++i) | 2112 // Count the matches. |
| 1930 data[i + dstOffset] = characters8()[i + srcSegmentStart]; | 2113 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) { |
| 1931 | 2114 ++matchCount; |
| 1932 dstOffset += srcSegmentLength; | 2115 srcSegmentStart += patternLength; |
| 1933 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar)); | 2116 } |
| 1934 | 2117 |
| 1935 dstOffset += repStrLength; | 2118 // If we have 0 matches, we don't have to do any more work |
| 1936 srcSegmentStart = srcSegmentEnd + 1; | 2119 if (!matchCount) |
| 1937 } | 2120 return this; |
| 1938 | 2121 |
| 1939 srcSegmentLength = m_length - srcSegmentStart; | 2122 unsigned newSize = m_length - matchCount * patternLength; |
| 1940 for (unsigned i = 0; i < srcSegmentLength; ++i) | 2123 RELEASE_ASSERT(!repStrLength || |
| 1941 data[i + dstOffset] = characters8()[i + srcSegmentStart]; | 2124 matchCount <= numeric_limits<unsigned>::max() / repStrLength); |
| 1942 | 2125 |
| 1943 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); | 2126 RELEASE_ASSERT(newSize <= |
| 1944 | 2127 (numeric_limits<unsigned>::max() - matchCount * repStrLength)); |
| 1945 return newImpl.release(); | 2128 |
| 1946 } | 2129 newSize += matchCount * repStrLength; |
| 1947 | 2130 |
| 1948 UChar* data; | 2131 // Construct the new data |
| 1949 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | 2132 size_t srcSegmentEnd; |
| 1950 | 2133 unsigned srcSegmentLength; |
| 1951 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { | 2134 srcSegmentStart = 0; |
| 1952 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | 2135 unsigned dstOffset = 0; |
| 1953 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLen
gth * sizeof(UChar)); | 2136 bool srcIs8Bit = is8Bit(); |
| 1954 | 2137 bool replacementIs8Bit = replacement->is8Bit(); |
| 1955 dstOffset += srcSegmentLength; | 2138 |
| 1956 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar)); | 2139 // There are 4 cases: |
| 1957 | 2140 // 1. This and replacement are both 8 bit. |
| 1958 dstOffset += repStrLength; | 2141 // 2. This and replacement are both 16 bit. |
| 1959 srcSegmentStart = srcSegmentEnd + 1; | 2142 // 3. This is 8 bit and replacement is 16 bit. |
| 1960 } | 2143 // 4. This is 16 bit and replacement is 8 bit. |
| 1961 | 2144 if (srcIs8Bit && replacementIs8Bit) { |
| 1962 srcSegmentLength = m_length - srcSegmentStart; | 2145 // Case 1 |
| 1963 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength
* sizeof(UChar)); | 2146 LChar* data; |
| 1964 | |
| 1965 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); | |
| 1966 | |
| 1967 return newImpl.release(); | |
| 1968 } | |
| 1969 | |
| 1970 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* repl
acement) | |
| 1971 { | |
| 1972 if (!pattern || !replacement) | |
| 1973 return this; | |
| 1974 | |
| 1975 unsigned patternLength = pattern->length(); | |
| 1976 if (!patternLength) | |
| 1977 return this; | |
| 1978 | |
| 1979 unsigned repStrLength = replacement->length(); | |
| 1980 size_t srcSegmentStart = 0; | |
| 1981 unsigned matchCount = 0; | |
| 1982 | |
| 1983 // Count the matches. | |
| 1984 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) { | |
| 1985 ++matchCount; | |
| 1986 srcSegmentStart += patternLength; | |
| 1987 } | |
| 1988 | |
| 1989 // If we have 0 matches, we don't have to do any more work | |
| 1990 if (!matchCount) | |
| 1991 return this; | |
| 1992 | |
| 1993 unsigned newSize = m_length - matchCount * patternLength; | |
| 1994 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max(
) / repStrLength); | |
| 1995 | |
| 1996 RELEASE_ASSERT(newSize <= (numeric_limits<unsigned>::max() - matchCount * re
pStrLength)); | |
| 1997 | |
| 1998 newSize += matchCount * repStrLength; | |
| 1999 | |
| 2000 | |
| 2001 // Construct the new data | |
| 2002 size_t srcSegmentEnd; | |
| 2003 unsigned srcSegmentLength; | |
| 2004 srcSegmentStart = 0; | |
| 2005 unsigned dstOffset = 0; | |
| 2006 bool srcIs8Bit = is8Bit(); | |
| 2007 bool replacementIs8Bit = replacement->is8Bit(); | |
| 2008 | |
| 2009 // There are 4 cases: | |
| 2010 // 1. This and replacement are both 8 bit. | |
| 2011 // 2. This and replacement are both 16 bit. | |
| 2012 // 3. This is 8 bit and replacement is 16 bit. | |
| 2013 // 4. This is 16 bit and replacement is 8 bit. | |
| 2014 if (srcIs8Bit && replacementIs8Bit) { | |
| 2015 // Case 1 | |
| 2016 LChar* data; | |
| 2017 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | |
| 2018 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { | |
| 2019 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
| 2020 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegment
Length * sizeof(LChar)); | |
| 2021 dstOffset += srcSegmentLength; | |
| 2022 memcpy(data + dstOffset, replacement->characters8(), repStrLength *
sizeof(LChar)); | |
| 2023 dstOffset += repStrLength; | |
| 2024 srcSegmentStart = srcSegmentEnd + patternLength; | |
| 2025 } | |
| 2026 | |
| 2027 srcSegmentLength = m_length - srcSegmentStart; | |
| 2028 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLeng
th * sizeof(LChar)); | |
| 2029 | |
| 2030 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); | |
| 2031 | |
| 2032 return newImpl.release(); | |
| 2033 } | |
| 2034 | |
| 2035 UChar* data; | |
| 2036 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); | 2147 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); |
| 2037 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { | 2148 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { |
| 2038 srcSegmentLength = srcSegmentEnd - srcSegmentStart; | 2149 srcSegmentLength = srcSegmentEnd - srcSegmentStart; |
| 2039 if (srcIs8Bit) { | 2150 memcpy(data + dstOffset, characters8() + srcSegmentStart, |
| 2040 // Case 3. | 2151 srcSegmentLength * sizeof(LChar)); |
| 2041 for (unsigned i = 0; i < srcSegmentLength; ++i) | 2152 dstOffset += srcSegmentLength; |
| 2042 data[i + dstOffset] = characters8()[i + srcSegmentStart]; | 2153 memcpy(data + dstOffset, replacement->characters8(), |
| 2043 } else { | 2154 repStrLength * sizeof(LChar)); |
| 2044 // Case 2 & 4. | 2155 dstOffset += repStrLength; |
| 2045 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmen
tLength * sizeof(UChar)); | 2156 srcSegmentStart = srcSegmentEnd + patternLength; |
| 2046 } | |
| 2047 dstOffset += srcSegmentLength; | |
| 2048 if (replacementIs8Bit) { | |
| 2049 // Cases 2 & 3. | |
| 2050 for (unsigned i = 0; i < repStrLength; ++i) | |
| 2051 data[i + dstOffset] = replacement->characters8()[i]; | |
| 2052 } else { | |
| 2053 // Case 4 | |
| 2054 memcpy(data + dstOffset, replacement->characters16(), repStrLength *
sizeof(UChar)); | |
| 2055 } | |
| 2056 dstOffset += repStrLength; | |
| 2057 srcSegmentStart = srcSegmentEnd + patternLength; | |
| 2058 } | 2157 } |
| 2059 | 2158 |
| 2060 srcSegmentLength = m_length - srcSegmentStart; | 2159 srcSegmentLength = m_length - srcSegmentStart; |
| 2160 memcpy(data + dstOffset, characters8() + srcSegmentStart, |
| 2161 srcSegmentLength * sizeof(LChar)); |
| 2162 |
| 2163 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); |
| 2164 |
| 2165 return newImpl.release(); |
| 2166 } |
| 2167 |
| 2168 UChar* data; |
| 2169 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); |
| 2170 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { |
| 2171 srcSegmentLength = srcSegmentEnd - srcSegmentStart; |
| 2061 if (srcIs8Bit) { | 2172 if (srcIs8Bit) { |
| 2062 // Case 3. | 2173 // Case 3. |
| 2063 for (unsigned i = 0; i < srcSegmentLength; ++i) | 2174 for (unsigned i = 0; i < srcSegmentLength; ++i) |
| 2064 data[i + dstOffset] = characters8()[i + srcSegmentStart]; | 2175 data[i + dstOffset] = characters8()[i + srcSegmentStart]; |
| 2065 } else { | 2176 } else { |
| 2066 // Cases 2 & 4. | 2177 // Case 2 & 4. |
| 2067 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLen
gth * sizeof(UChar)); | 2178 memcpy(data + dstOffset, characters16() + srcSegmentStart, |
| 2068 } | 2179 srcSegmentLength * sizeof(UChar)); |
| 2069 | 2180 } |
| 2070 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); | 2181 dstOffset += srcSegmentLength; |
| 2071 | 2182 if (replacementIs8Bit) { |
| 2072 return newImpl.release(); | 2183 // Cases 2 & 3. |
| 2073 } | 2184 for (unsigned i = 0; i < repStrLength; ++i) |
| 2074 | 2185 data[i + dstOffset] = replacement->characters8()[i]; |
| 2075 PassRefPtr<StringImpl> StringImpl::upconvertedString() | 2186 } else { |
| 2076 { | 2187 // Case 4 |
| 2077 if (is8Bit()) | 2188 memcpy(data + dstOffset, replacement->characters16(), |
| 2078 return String::make16BitFrom8BitSource(characters8(), m_length).releaseI
mpl(); | 2189 repStrLength * sizeof(UChar)); |
| 2079 return this; | 2190 } |
| 2080 } | 2191 dstOffset += repStrLength; |
| 2081 | 2192 srcSegmentStart = srcSegmentEnd + patternLength; |
| 2082 static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl*
b) | 2193 } |
| 2083 { | 2194 |
| 2084 unsigned aLength = a->length(); | 2195 srcSegmentLength = m_length - srcSegmentStart; |
| 2085 unsigned bLength = b->length(); | 2196 if (srcIs8Bit) { |
| 2086 if (aLength != bLength) | 2197 // Case 3. |
| 2198 for (unsigned i = 0; i < srcSegmentLength; ++i) |
| 2199 data[i + dstOffset] = characters8()[i + srcSegmentStart]; |
| 2200 } else { |
| 2201 // Cases 2 & 4. |
| 2202 memcpy(data + dstOffset, characters16() + srcSegmentStart, |
| 2203 srcSegmentLength * sizeof(UChar)); |
| 2204 } |
| 2205 |
| 2206 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); |
| 2207 |
| 2208 return newImpl.release(); |
| 2209 } |
| 2210 |
| 2211 PassRefPtr<StringImpl> StringImpl::upconvertedString() { |
| 2212 if (is8Bit()) |
| 2213 return String::make16BitFrom8BitSource(characters8(), m_length) |
| 2214 .releaseImpl(); |
| 2215 return this; |
| 2216 } |
| 2217 |
| 2218 static inline bool stringImplContentEqual(const StringImpl* a, |
| 2219 const StringImpl* b) { |
| 2220 unsigned aLength = a->length(); |
| 2221 unsigned bLength = b->length(); |
| 2222 if (aLength != bLength) |
| 2223 return false; |
| 2224 |
| 2225 if (a->is8Bit()) { |
| 2226 if (b->is8Bit()) |
| 2227 return equal(a->characters8(), b->characters8(), aLength); |
| 2228 |
| 2229 return equal(a->characters8(), b->characters16(), aLength); |
| 2230 } |
| 2231 |
| 2232 if (b->is8Bit()) |
| 2233 return equal(a->characters16(), b->characters8(), aLength); |
| 2234 |
| 2235 return equal(a->characters16(), b->characters16(), aLength); |
| 2236 } |
| 2237 |
| 2238 bool equal(const StringImpl* a, const StringImpl* b) { |
| 2239 if (a == b) |
| 2240 return true; |
| 2241 if (!a || !b) |
| 2242 return false; |
| 2243 if (a->isAtomic() && b->isAtomic()) |
| 2244 return false; |
| 2245 |
| 2246 return stringImplContentEqual(a, b); |
| 2247 } |
| 2248 |
| 2249 template <typename CharType> |
| 2250 inline bool equalInternal(const StringImpl* a, |
| 2251 const CharType* b, |
| 2252 unsigned length) { |
| 2253 if (!a) |
| 2254 return !b; |
| 2255 if (!b) |
| 2256 return false; |
| 2257 |
| 2258 if (a->length() != length) |
| 2259 return false; |
| 2260 if (a->is8Bit()) |
| 2261 return equal(a->characters8(), b, length); |
| 2262 return equal(a->characters16(), b, length); |
| 2263 } |
| 2264 |
| 2265 bool equal(const StringImpl* a, const LChar* b, unsigned length) { |
| 2266 return equalInternal(a, b, length); |
| 2267 } |
| 2268 |
| 2269 bool equal(const StringImpl* a, const UChar* b, unsigned length) { |
| 2270 return equalInternal(a, b, length); |
| 2271 } |
| 2272 |
| 2273 bool equal(const StringImpl* a, const LChar* b) { |
| 2274 if (!a) |
| 2275 return !b; |
| 2276 if (!b) |
| 2277 return !a; |
| 2278 |
| 2279 unsigned length = a->length(); |
| 2280 |
| 2281 if (a->is8Bit()) { |
| 2282 const LChar* aPtr = a->characters8(); |
| 2283 for (unsigned i = 0; i != length; ++i) { |
| 2284 LChar bc = b[i]; |
| 2285 LChar ac = aPtr[i]; |
| 2286 if (!bc) |
| 2087 return false; | 2287 return false; |
| 2088 | 2288 if (ac != bc) |
| 2089 if (a->is8Bit()) { | |
| 2090 if (b->is8Bit()) | |
| 2091 return equal(a->characters8(), b->characters8(), aLength); | |
| 2092 | |
| 2093 return equal(a->characters8(), b->characters16(), aLength); | |
| 2094 } | |
| 2095 | |
| 2096 if (b->is8Bit()) | |
| 2097 return equal(a->characters16(), b->characters8(), aLength); | |
| 2098 | |
| 2099 return equal(a->characters16(), b->characters16(), aLength); | |
| 2100 } | |
| 2101 | |
| 2102 bool equal(const StringImpl* a, const StringImpl* b) | |
| 2103 { | |
| 2104 if (a == b) | |
| 2105 return true; | |
| 2106 if (!a || !b) | |
| 2107 return false; | 2289 return false; |
| 2108 if (a->isAtomic() && b->isAtomic()) | 2290 } |
| 2291 |
| 2292 return !b[length]; |
| 2293 } |
| 2294 |
| 2295 const UChar* aPtr = a->characters16(); |
| 2296 for (unsigned i = 0; i != length; ++i) { |
| 2297 LChar bc = b[i]; |
| 2298 if (!bc) |
| 2299 return false; |
| 2300 if (aPtr[i] != bc) |
| 2301 return false; |
| 2302 } |
| 2303 |
| 2304 return !b[length]; |
| 2305 } |
| 2306 |
| 2307 bool equalNonNull(const StringImpl* a, const StringImpl* b) { |
| 2308 ASSERT(a && b); |
| 2309 if (a == b) |
| 2310 return true; |
| 2311 |
| 2312 return stringImplContentEqual(a, b); |
| 2313 } |
| 2314 |
| 2315 bool equalIgnoringCase(const StringImpl* a, const StringImpl* b) { |
| 2316 if (a == b) |
| 2317 return true; |
| 2318 if (!a || !b) |
| 2319 return false; |
| 2320 |
| 2321 return CaseFoldingHash::equal(a, b); |
| 2322 } |
| 2323 |
| 2324 bool equalIgnoringCase(const StringImpl* a, const LChar* b) { |
| 2325 if (!a) |
| 2326 return !b; |
| 2327 if (!b) |
| 2328 return !a; |
| 2329 |
| 2330 unsigned length = a->length(); |
| 2331 |
| 2332 // Do a faster loop for the case where all the characters are ASCII. |
| 2333 UChar ored = 0; |
| 2334 bool equal = true; |
| 2335 if (a->is8Bit()) { |
| 2336 const LChar* as = a->characters8(); |
| 2337 for (unsigned i = 0; i != length; ++i) { |
| 2338 LChar bc = b[i]; |
| 2339 if (!bc) |
| 2109 return false; | 2340 return false; |
| 2110 | 2341 UChar ac = as[i]; |
| 2111 return stringImplContentEqual(a, b); | 2342 ored |= ac; |
| 2112 } | 2343 equal = equal && (toASCIILower(ac) == toASCIILower(bc)); |
| 2113 | |
| 2114 template <typename CharType> | |
| 2115 inline bool equalInternal(const StringImpl* a, const CharType* b, unsigned lengt
h) | |
| 2116 { | |
| 2117 if (!a) | |
| 2118 return !b; | |
| 2119 if (!b) | |
| 2120 return false; | |
| 2121 | |
| 2122 if (a->length() != length) | |
| 2123 return false; | |
| 2124 if (a->is8Bit()) | |
| 2125 return equal(a->characters8(), b, length); | |
| 2126 return equal(a->characters16(), b, length); | |
| 2127 } | |
| 2128 | |
| 2129 bool equal(const StringImpl* a, const LChar* b, unsigned length) | |
| 2130 { | |
| 2131 return equalInternal(a, b, length); | |
| 2132 } | |
| 2133 | |
| 2134 bool equal(const StringImpl* a, const UChar* b, unsigned length) | |
| 2135 { | |
| 2136 return equalInternal(a, b, length); | |
| 2137 } | |
| 2138 | |
| 2139 bool equal(const StringImpl* a, const LChar* b) | |
| 2140 { | |
| 2141 if (!a) | |
| 2142 return !b; | |
| 2143 if (!b) | |
| 2144 return !a; | |
| 2145 | |
| 2146 unsigned length = a->length(); | |
| 2147 | |
| 2148 if (a->is8Bit()) { | |
| 2149 const LChar* aPtr = a->characters8(); | |
| 2150 for (unsigned i = 0; i != length; ++i) { | |
| 2151 LChar bc = b[i]; | |
| 2152 LChar ac = aPtr[i]; | |
| 2153 if (!bc) | |
| 2154 return false; | |
| 2155 if (ac != bc) | |
| 2156 return false; | |
| 2157 } | |
| 2158 | |
| 2159 return !b[length]; | |
| 2160 } | |
| 2161 | |
| 2162 const UChar* aPtr = a->characters16(); | |
| 2163 for (unsigned i = 0; i != length; ++i) { | |
| 2164 LChar bc = b[i]; | |
| 2165 if (!bc) | |
| 2166 return false; | |
| 2167 if (aPtr[i] != bc) | |
| 2168 return false; | |
| 2169 } | |
| 2170 | |
| 2171 return !b[length]; | |
| 2172 } | |
| 2173 | |
| 2174 bool equalNonNull(const StringImpl* a, const StringImpl* b) | |
| 2175 { | |
| 2176 ASSERT(a && b); | |
| 2177 if (a == b) | |
| 2178 return true; | |
| 2179 | |
| 2180 return stringImplContentEqual(a, b); | |
| 2181 } | |
| 2182 | |
| 2183 bool equalIgnoringCase(const StringImpl* a, const StringImpl* b) | |
| 2184 { | |
| 2185 if (a == b) | |
| 2186 return true; | |
| 2187 if (!a || !b) | |
| 2188 return false; | |
| 2189 | |
| 2190 return CaseFoldingHash::equal(a, b); | |
| 2191 } | |
| 2192 | |
| 2193 bool equalIgnoringCase(const StringImpl* a, const LChar* b) | |
| 2194 { | |
| 2195 if (!a) | |
| 2196 return !b; | |
| 2197 if (!b) | |
| 2198 return !a; | |
| 2199 | |
| 2200 unsigned length = a->length(); | |
| 2201 | |
| 2202 // Do a faster loop for the case where all the characters are ASCII. | |
| 2203 UChar ored = 0; | |
| 2204 bool equal = true; | |
| 2205 if (a->is8Bit()) { | |
| 2206 const LChar* as = a->characters8(); | |
| 2207 for (unsigned i = 0; i != length; ++i) { | |
| 2208 LChar bc = b[i]; | |
| 2209 if (!bc) | |
| 2210 return false; | |
| 2211 UChar ac = as[i]; | |
| 2212 ored |= ac; | |
| 2213 equal = equal && (toASCIILower(ac) == toASCIILower(bc)); | |
| 2214 } | |
| 2215 | |
| 2216 // Do a slower implementation for cases that include non-ASCII character
s. | |
| 2217 if (ored & ~0x7F) { | |
| 2218 equal = true; | |
| 2219 for (unsigned i = 0; i != length; ++i) | |
| 2220 equal = equal && (foldCase(as[i]) == foldCase(b[i])); | |
| 2221 } | |
| 2222 | |
| 2223 return equal && !b[length]; | |
| 2224 } | |
| 2225 | |
| 2226 const UChar* as = a->characters16(); | |
| 2227 for (unsigned i = 0; i != length; ++i) { | |
| 2228 LChar bc = b[i]; | |
| 2229 if (!bc) | |
| 2230 return false; | |
| 2231 UChar ac = as[i]; | |
| 2232 ored |= ac; | |
| 2233 equal = equal && (toASCIILower(ac) == toASCIILower(bc)); | |
| 2234 } | 2344 } |
| 2235 | 2345 |
| 2236 // Do a slower implementation for cases that include non-ASCII characters. | 2346 // Do a slower implementation for cases that include non-ASCII characters. |
| 2237 if (ored & ~0x7F) { | 2347 if (ored & ~0x7F) { |
| 2238 equal = true; | 2348 equal = true; |
| 2239 for (unsigned i = 0; i != length; ++i) { | 2349 for (unsigned i = 0; i != length; ++i) |
| 2240 equal = equal && (foldCase(as[i]) == foldCase(b[i])); | 2350 equal = equal && (foldCase(as[i]) == foldCase(b[i])); |
| 2241 } | |
| 2242 } | 2351 } |
| 2243 | 2352 |
| 2244 return equal && !b[length]; | 2353 return equal && !b[length]; |
| 2245 } | 2354 } |
| 2246 | 2355 |
| 2247 bool equalIgnoringCaseNonNull(const StringImpl* a, const StringImpl* b) | 2356 const UChar* as = a->characters16(); |
| 2248 { | 2357 for (unsigned i = 0; i != length; ++i) { |
| 2249 ASSERT(a && b); | 2358 LChar bc = b[i]; |
| 2250 if (a == b) | 2359 if (!bc) |
| 2251 return true; | 2360 return false; |
| 2252 | 2361 UChar ac = as[i]; |
| 2253 unsigned length = a->length(); | 2362 ored |= ac; |
| 2254 if (length != b->length()) | 2363 equal = equal && (toASCIILower(ac) == toASCIILower(bc)); |
| 2255 return false; | 2364 } |
| 2256 | 2365 |
| 2257 if (a->is8Bit()) { | 2366 // Do a slower implementation for cases that include non-ASCII characters. |
| 2258 if (b->is8Bit()) | 2367 if (ored & ~0x7F) { |
| 2259 return equalIgnoringCase(a->characters8(), b->characters8(), length)
; | 2368 equal = true; |
| 2260 | 2369 for (unsigned i = 0; i != length; ++i) { |
| 2261 return equalIgnoringCase(b->characters16(), a->characters8(), length); | 2370 equal = equal && (foldCase(as[i]) == foldCase(b[i])); |
| 2262 } | 2371 } |
| 2263 | 2372 } |
| 2373 |
| 2374 return equal && !b[length]; |
| 2375 } |
| 2376 |
| 2377 bool equalIgnoringCaseNonNull(const StringImpl* a, const StringImpl* b) { |
| 2378 ASSERT(a && b); |
| 2379 if (a == b) |
| 2380 return true; |
| 2381 |
| 2382 unsigned length = a->length(); |
| 2383 if (length != b->length()) |
| 2384 return false; |
| 2385 |
| 2386 if (a->is8Bit()) { |
| 2264 if (b->is8Bit()) | 2387 if (b->is8Bit()) |
| 2265 return equalIgnoringCase(a->characters16(), b->characters8(), length); | 2388 return equalIgnoringCase(a->characters8(), b->characters8(), length); |
| 2266 | 2389 |
| 2267 return equalIgnoringCase(a->characters16(), b->characters16(), length); | 2390 return equalIgnoringCase(b->characters16(), a->characters8(), length); |
| 2268 } | 2391 } |
| 2269 | 2392 |
| 2270 bool equalIgnoringNullity(StringImpl* a, StringImpl* b) | 2393 if (b->is8Bit()) |
| 2271 { | 2394 return equalIgnoringCase(a->characters16(), b->characters8(), length); |
| 2272 if (!a && b && !b->length()) | 2395 |
| 2273 return true; | 2396 return equalIgnoringCase(a->characters16(), b->characters16(), length); |
| 2274 if (!b && a && !a->length()) | 2397 } |
| 2275 return true; | 2398 |
| 2276 return equal(a, b); | 2399 bool equalIgnoringNullity(StringImpl* a, StringImpl* b) { |
| 2277 } | 2400 if (!a && b && !b->length()) |
| 2278 | 2401 return true; |
| 2279 size_t StringImpl::sizeInBytes() const | 2402 if (!b && a && !a->length()) |
| 2280 { | 2403 return true; |
| 2281 size_t size = length(); | 2404 return equal(a, b); |
| 2282 if (!is8Bit()) | 2405 } |
| 2283 size *= 2; | 2406 |
| 2284 return size + sizeof(*this); | 2407 size_t StringImpl::sizeInBytes() const { |
| 2285 } | 2408 size_t size = length(); |
| 2286 | 2409 if (!is8Bit()) |
| 2287 UChar32 toUpper(UChar32 c, const AtomicString& localeIdentifier) | 2410 size *= 2; |
| 2288 { | 2411 return size + sizeof(*this); |
| 2289 if (!localeIdentifier.isNull()) { | 2412 } |
| 2290 if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(l
ocaleIdentifier, "az")) { | 2413 |
| 2291 if (c == 'i') | 2414 UChar32 toUpper(UChar32 c, const AtomicString& localeIdentifier) { |
| 2292 return latinCapitalLetterIWithDotAbove; | 2415 if (!localeIdentifier.isNull()) { |
| 2293 if (c == latinSmallLetterDotlessI) | 2416 if (localeIdMatchesLang(localeIdentifier, "tr") || |
| 2294 return 'I'; | 2417 localeIdMatchesLang(localeIdentifier, "az")) { |
| 2295 } else if (localeIdMatchesLang(localeIdentifier, "lt")) { | 2418 if (c == 'i') |
| 2296 // TODO(rob.buis) implement upper-casing rules for lt | 2419 return latinCapitalLetterIWithDotAbove; |
| 2297 // like in StringImpl::upper(locale). | 2420 if (c == latinSmallLetterDotlessI) |
| 2298 } | 2421 return 'I'; |
| 2299 } | 2422 } else if (localeIdMatchesLang(localeIdentifier, "lt")) { |
| 2300 | 2423 // TODO(rob.buis) implement upper-casing rules for lt |
| 2301 return toUpper(c); | 2424 // like in StringImpl::upper(locale). |
| 2302 } | 2425 } |
| 2303 | 2426 } |
| 2304 } // namespace WTF | 2427 |
| 2428 return toUpper(c); |
| 2429 } |
| 2430 |
| 2431 } // namespace WTF |
| OLD | NEW |