| OLD | NEW |
| 1 /* | 1 /* |
| 2 * (C) 1999 Lars Knoll (knoll@kde.org) | 2 * (C) 1999 Lars Knoll (knoll@kde.org) |
| 3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010, 2012 Apple Inc. All rights
reserved. | 3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010, 2012 Apple Inc. All rights
reserved. |
| 4 * Copyright (C) 2007-2009 Torch Mobile, Inc. | 4 * Copyright (C) 2007-2009 Torch Mobile, Inc. |
| 5 * | 5 * |
| 6 * This library is free software; you can redistribute it and/or | 6 * This library is free software; you can redistribute it and/or |
| 7 * modify it under the terms of the GNU Library General Public | 7 * modify it under the terms of the GNU Library General Public |
| 8 * License as published by the Free Software Foundation; either | 8 * License as published by the Free Software Foundation; either |
| 9 * version 2 of the License, or (at your option) any later version. | 9 * version 2 of the License, or (at your option) any later version. |
| 10 * | 10 * |
| (...skipping 24 matching lines...) Expand all Loading... |
| 35 #include "wtf/text/Unicode.h" | 35 #include "wtf/text/Unicode.h" |
| 36 #include <algorithm> | 36 #include <algorithm> |
| 37 #include <stdarg.h> | 37 #include <stdarg.h> |
| 38 | 38 |
| 39 namespace WTF { | 39 namespace WTF { |
| 40 | 40 |
| 41 using namespace Unicode; | 41 using namespace Unicode; |
| 42 | 42 |
| 43 // Construct a string with UTF-16 data. | 43 // Construct a string with UTF-16 data. |
| 44 String::String(const UChar* characters, unsigned length) | 44 String::String(const UChar* characters, unsigned length) |
| 45 : m_impl(characters ? StringImpl::create(characters, length) : nullptr) | 45 : m_impl(characters ? StringImpl::create(characters, length) : nullptr) {} |
| 46 { | |
| 47 } | |
| 48 | 46 |
| 49 // Construct a string with UTF-16 data, from a null-terminated source. | 47 // Construct a string with UTF-16 data, from a null-terminated source. |
| 50 String::String(const UChar* str) | 48 String::String(const UChar* str) { |
| 51 { | 49 if (!str) |
| 52 if (!str) | 50 return; |
| 53 return; | 51 m_impl = StringImpl::create(str, lengthOfNullTerminatedString(str)); |
| 54 m_impl = StringImpl::create(str, lengthOfNullTerminatedString(str)); | |
| 55 } | 52 } |
| 56 | 53 |
| 57 // Construct a string with latin1 data. | 54 // Construct a string with latin1 data. |
| 58 String::String(const LChar* characters, unsigned length) | 55 String::String(const LChar* characters, unsigned length) |
| 59 : m_impl(characters ? StringImpl::create(characters, length) : nullptr) | 56 : m_impl(characters ? StringImpl::create(characters, length) : nullptr) {} |
| 60 { | |
| 61 } | |
| 62 | 57 |
| 63 String::String(const char* characters, unsigned length) | 58 String::String(const char* characters, unsigned length) |
| 64 : m_impl(characters ? StringImpl::create(reinterpret_cast<const LChar*>(char
acters), length) : nullptr) | 59 : m_impl(characters ? StringImpl::create( |
| 65 { | 60 reinterpret_cast<const LChar*>(characters), |
| 66 } | 61 length) |
| 62 : nullptr) {} |
| 67 | 63 |
| 68 // Construct a string with latin1 data, from a null-terminated source. | 64 // Construct a string with latin1 data, from a null-terminated source. |
| 69 String::String(const LChar* characters) | 65 String::String(const LChar* characters) |
| 70 : m_impl(characters ? StringImpl::create(characters) : nullptr) | 66 : m_impl(characters ? StringImpl::create(characters) : nullptr) {} |
| 71 { | |
| 72 } | |
| 73 | 67 |
| 74 String::String(const char* characters) | 68 String::String(const char* characters) |
| 75 : m_impl(characters ? StringImpl::create(reinterpret_cast<const LChar*>(char
acters)) : nullptr) | 69 : m_impl(characters ? StringImpl::create( |
| 76 { | 70 reinterpret_cast<const LChar*>(characters)) |
| 77 } | 71 : nullptr) {} |
| 78 | 72 |
| 79 void String::append(const String& string) | 73 void String::append(const String& string) { |
| 80 { | 74 if (string.isEmpty()) |
| 81 if (string.isEmpty()) | 75 return; |
| 82 return; | 76 if (!m_impl) { |
| 83 if (!m_impl) { | 77 m_impl = string.m_impl; |
| 84 m_impl = string.m_impl; | 78 return; |
| 85 return; | 79 } |
| 86 } | 80 |
| 87 | 81 // FIXME: This is extremely inefficient. So much so that we might want to |
| 88 // FIXME: This is extremely inefficient. So much so that we might want to | 82 // take this out of String's API. We can make it better by optimizing the |
| 89 // take this out of String's API. We can make it better by optimizing the | 83 // case where exactly one String is pointing at this StringImpl, but even |
| 90 // case where exactly one String is pointing at this StringImpl, but even | 84 // then it's going to require a call into the allocator every single time. |
| 91 // then it's going to require a call into the allocator every single time. | 85 |
| 92 | 86 if (m_impl->is8Bit() && string.m_impl->is8Bit()) { |
| 93 if (m_impl->is8Bit() && string.m_impl->is8Bit()) { | 87 LChar* data; |
| 94 LChar* data; | 88 RELEASE_ASSERT(string.length() <= |
| 95 RELEASE_ASSERT(string.length() <= std::numeric_limits<unsigned>::max() -
m_impl->length()); | 89 std::numeric_limits<unsigned>::max() - m_impl->length()); |
| 96 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->len
gth() + string.length(), data); | 90 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized( |
| 97 memcpy(data, m_impl->characters8(), m_impl->length() * sizeof(LChar)); | 91 m_impl->length() + string.length(), data); |
| 98 memcpy(data + m_impl->length(), string.characters8(), string.length() *
sizeof(LChar)); | 92 memcpy(data, m_impl->characters8(), m_impl->length() * sizeof(LChar)); |
| 99 m_impl = newImpl.release(); | 93 memcpy(data + m_impl->length(), string.characters8(), |
| 100 return; | 94 string.length() * sizeof(LChar)); |
| 101 } | |
| 102 | |
| 103 UChar* data; | |
| 104 RELEASE_ASSERT(string.length() <= std::numeric_limits<unsigned>::max() - m_i
mpl->length()); | |
| 105 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length(
) + string.length(), data); | |
| 106 | |
| 107 if (m_impl->is8Bit()) | |
| 108 StringImpl::copyChars(data, m_impl->characters8(), m_impl->length()); | |
| 109 else | |
| 110 StringImpl::copyChars(data, m_impl->characters16(), m_impl->length()); | |
| 111 | |
| 112 if (string.impl()->is8Bit()) | |
| 113 StringImpl::copyChars(data + m_impl->length(), string.impl()->characters
8(), string.impl()->length()); | |
| 114 else | |
| 115 StringImpl::copyChars(data + m_impl->length(), string.impl()->characters
16(), string.impl()->length()); | |
| 116 | |
| 117 m_impl = newImpl.release(); | 95 m_impl = newImpl.release(); |
| 96 return; |
| 97 } |
| 98 |
| 99 UChar* data; |
| 100 RELEASE_ASSERT(string.length() <= |
| 101 std::numeric_limits<unsigned>::max() - m_impl->length()); |
| 102 RefPtr<StringImpl> newImpl = |
| 103 StringImpl::createUninitialized(m_impl->length() + string.length(), data); |
| 104 |
| 105 if (m_impl->is8Bit()) |
| 106 StringImpl::copyChars(data, m_impl->characters8(), m_impl->length()); |
| 107 else |
| 108 StringImpl::copyChars(data, m_impl->characters16(), m_impl->length()); |
| 109 |
| 110 if (string.impl()->is8Bit()) |
| 111 StringImpl::copyChars(data + m_impl->length(), string.impl()->characters8(), |
| 112 string.impl()->length()); |
| 113 else |
| 114 StringImpl::copyChars(data + m_impl->length(), |
| 115 string.impl()->characters16(), |
| 116 string.impl()->length()); |
| 117 |
| 118 m_impl = newImpl.release(); |
| 118 } | 119 } |
| 119 | 120 |
| 120 template <typename CharacterType> | 121 template <typename CharacterType> |
| 121 inline void String::appendInternal(CharacterType c) | 122 inline void String::appendInternal(CharacterType c) { |
| 122 { | 123 // FIXME: This is extremely inefficient. So much so that we might want to |
| 123 // FIXME: This is extremely inefficient. So much so that we might want to | 124 // take this out of String's API. We can make it better by optimizing the |
| 124 // take this out of String's API. We can make it better by optimizing the | 125 // case where exactly one String is pointing at this StringImpl, but even |
| 125 // case where exactly one String is pointing at this StringImpl, but even | 126 // then it's going to require a call into the allocator every single time. |
| 126 // then it's going to require a call into the allocator every single time. | 127 if (!m_impl) { |
| 127 if (!m_impl) { | 128 m_impl = StringImpl::create(&c, 1); |
| 128 m_impl = StringImpl::create(&c, 1); | 129 return; |
| 129 return; | 130 } |
| 130 } | 131 |
| 131 | 132 // FIXME: We should be able to create an 8 bit string via this code path. |
| 132 // FIXME: We should be able to create an 8 bit string via this code path. | 133 UChar* data; |
| 133 UChar* data; | 134 RELEASE_ASSERT(m_impl->length() < std::numeric_limits<unsigned>::max()); |
| 134 RELEASE_ASSERT(m_impl->length() < std::numeric_limits<unsigned>::max()); | 135 RefPtr<StringImpl> newImpl = |
| 135 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length(
) + 1, data); | 136 StringImpl::createUninitialized(m_impl->length() + 1, data); |
| 136 if (m_impl->is8Bit()) | 137 if (m_impl->is8Bit()) |
| 137 StringImpl::copyChars(data, m_impl->characters8(), m_impl->length()); | 138 StringImpl::copyChars(data, m_impl->characters8(), m_impl->length()); |
| 138 else | 139 else |
| 139 StringImpl::copyChars(data, m_impl->characters16(), m_impl->length()); | 140 StringImpl::copyChars(data, m_impl->characters16(), m_impl->length()); |
| 140 data[m_impl->length()] = c; | 141 data[m_impl->length()] = c; |
| 141 m_impl = newImpl.release(); | 142 m_impl = newImpl.release(); |
| 142 } | 143 } |
| 143 | 144 |
| 144 void String::append(LChar c) | 145 void String::append(LChar c) { |
| 145 { | 146 appendInternal(c); |
| 146 appendInternal(c); | 147 } |
| 147 } | 148 |
| 148 | 149 void String::append(UChar c) { |
| 149 void String::append(UChar c) | 150 appendInternal(c); |
| 150 { | 151 } |
| 151 appendInternal(c); | 152 |
| 152 } | 153 int codePointCompare(const String& a, const String& b) { |
| 153 | 154 return codePointCompare(a.impl(), b.impl()); |
| 154 int codePointCompare(const String& a, const String& b) | 155 } |
| 155 { | 156 |
| 156 return codePointCompare(a.impl(), b.impl()); | 157 void String::insert(const String& string, unsigned position) { |
| 157 } | 158 if (string.isEmpty()) { |
| 158 | 159 if (string.isNull()) |
| 159 void String::insert(const String& string, unsigned position) | 160 return; |
| 160 { | 161 if (isNull()) |
| 161 if (string.isEmpty()) { | 162 m_impl = string.impl(); |
| 162 if (string.isNull()) | 163 return; |
| 163 return; | 164 } |
| 164 if (isNull()) | 165 |
| 165 m_impl = string.impl(); | 166 if (string.is8Bit()) |
| 166 return; | 167 insert(string.impl()->characters8(), string.length(), position); |
| 167 } | 168 else |
| 168 | 169 insert(string.impl()->characters16(), string.length(), position); |
| 169 if (string.is8Bit()) | 170 } |
| 170 insert(string.impl()->characters8(), string.length(), position); | 171 |
| 171 else | 172 void String::append(const LChar* charactersToAppend, unsigned lengthToAppend) { |
| 172 insert(string.impl()->characters16(), string.length(), position); | 173 if (!m_impl) { |
| 173 } | 174 if (!charactersToAppend) |
| 174 | 175 return; |
| 175 void String::append(const LChar* charactersToAppend, unsigned lengthToAppend) | 176 m_impl = StringImpl::create(charactersToAppend, lengthToAppend); |
| 176 { | 177 return; |
| 177 if (!m_impl) { | 178 } |
| 178 if (!charactersToAppend) | 179 |
| 179 return; | 180 if (!lengthToAppend) |
| 180 m_impl = StringImpl::create(charactersToAppend, lengthToAppend); | 181 return; |
| 181 return; | 182 |
| 182 } | 183 ASSERT(charactersToAppend); |
| 183 | 184 |
| 184 if (!lengthToAppend) | 185 unsigned strLength = m_impl->length(); |
| 185 return; | 186 |
| 186 | 187 if (m_impl->is8Bit()) { |
| 187 ASSERT(charactersToAppend); | 188 RELEASE_ASSERT(lengthToAppend <= |
| 188 | 189 std::numeric_limits<unsigned>::max() - strLength); |
| 189 unsigned strLength = m_impl->length(); | 190 LChar* data; |
| 190 | 191 RefPtr<StringImpl> newImpl = |
| 191 if (m_impl->is8Bit()) { | 192 StringImpl::createUninitialized(strLength + lengthToAppend, data); |
| 192 RELEASE_ASSERT(lengthToAppend <= std::numeric_limits<unsigned>::max() -
strLength); | 193 StringImpl::copyChars(data, m_impl->characters8(), strLength); |
| 193 LChar* data; | |
| 194 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(strLength +
lengthToAppend, data); | |
| 195 StringImpl::copyChars(data, m_impl->characters8(), strLength); | |
| 196 StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppe
nd); | |
| 197 m_impl = newImpl.release(); | |
| 198 return; | |
| 199 } | |
| 200 | |
| 201 RELEASE_ASSERT(lengthToAppend <= std::numeric_limits<unsigned>::max() - strL
ength); | |
| 202 UChar* data; | |
| 203 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() + leng
thToAppend, data); | |
| 204 StringImpl::copyChars(data, m_impl->characters16(), strLength); | |
| 205 StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppend); | 194 StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppend); |
| 206 m_impl = newImpl.release(); | 195 m_impl = newImpl.release(); |
| 207 } | 196 return; |
| 208 | 197 } |
| 209 void String::append(const UChar* charactersToAppend, unsigned lengthToAppend) | 198 |
| 210 { | 199 RELEASE_ASSERT(lengthToAppend <= |
| 211 if (!m_impl) { | 200 std::numeric_limits<unsigned>::max() - strLength); |
| 212 if (!charactersToAppend) | 201 UChar* data; |
| 213 return; | 202 RefPtr<StringImpl> newImpl = |
| 214 m_impl = StringImpl::create(charactersToAppend, lengthToAppend); | 203 StringImpl::createUninitialized(length() + lengthToAppend, data); |
| 215 return; | 204 StringImpl::copyChars(data, m_impl->characters16(), strLength); |
| 216 } | 205 StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppend); |
| 217 | 206 m_impl = newImpl.release(); |
| 218 if (!lengthToAppend) | 207 } |
| 219 return; | 208 |
| 220 | 209 void String::append(const UChar* charactersToAppend, unsigned lengthToAppend) { |
| 221 unsigned strLength = m_impl->length(); | 210 if (!m_impl) { |
| 222 | 211 if (!charactersToAppend) |
| 223 ASSERT(charactersToAppend); | 212 return; |
| 224 RELEASE_ASSERT(lengthToAppend <= std::numeric_limits<unsigned>::max() - strL
ength); | 213 m_impl = StringImpl::create(charactersToAppend, lengthToAppend); |
| 214 return; |
| 215 } |
| 216 |
| 217 if (!lengthToAppend) |
| 218 return; |
| 219 |
| 220 unsigned strLength = m_impl->length(); |
| 221 |
| 222 ASSERT(charactersToAppend); |
| 223 RELEASE_ASSERT(lengthToAppend <= |
| 224 std::numeric_limits<unsigned>::max() - strLength); |
| 225 UChar* data; |
| 226 RefPtr<StringImpl> newImpl = |
| 227 StringImpl::createUninitialized(strLength + lengthToAppend, data); |
| 228 if (m_impl->is8Bit()) |
| 229 StringImpl::copyChars(data, characters8(), strLength); |
| 230 else |
| 231 StringImpl::copyChars(data, characters16(), strLength); |
| 232 StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppend); |
| 233 m_impl = newImpl.release(); |
| 234 } |
| 235 |
| 236 template <typename CharType> |
| 237 PassRefPtr<StringImpl> insertInternal(PassRefPtr<StringImpl> impl, |
| 238 const CharType* charactersToInsert, |
| 239 unsigned lengthToInsert, |
| 240 unsigned position) { |
| 241 if (!lengthToInsert) |
| 242 return impl; |
| 243 |
| 244 ASSERT(charactersToInsert); |
| 245 UChar* data; // FIXME: We should be able to create an 8 bit string here. |
| 246 RELEASE_ASSERT(lengthToInsert <= |
| 247 std::numeric_limits<unsigned>::max() - impl->length()); |
| 248 RefPtr<StringImpl> newImpl = |
| 249 StringImpl::createUninitialized(impl->length() + lengthToInsert, data); |
| 250 |
| 251 if (impl->is8Bit()) |
| 252 StringImpl::copyChars(data, impl->characters8(), position); |
| 253 else |
| 254 StringImpl::copyChars(data, impl->characters16(), position); |
| 255 |
| 256 StringImpl::copyChars(data + position, charactersToInsert, lengthToInsert); |
| 257 |
| 258 if (impl->is8Bit()) |
| 259 StringImpl::copyChars(data + position + lengthToInsert, |
| 260 impl->characters8() + position, |
| 261 impl->length() - position); |
| 262 else |
| 263 StringImpl::copyChars(data + position + lengthToInsert, |
| 264 impl->characters16() + position, |
| 265 impl->length() - position); |
| 266 |
| 267 return newImpl.release(); |
| 268 } |
| 269 |
| 270 void String::insert(const UChar* charactersToInsert, |
| 271 unsigned lengthToInsert, |
| 272 unsigned position) { |
| 273 if (position >= length()) { |
| 274 append(charactersToInsert, lengthToInsert); |
| 275 return; |
| 276 } |
| 277 ASSERT(m_impl); |
| 278 m_impl = insertInternal(m_impl.release(), charactersToInsert, lengthToInsert, |
| 279 position); |
| 280 } |
| 281 |
| 282 void String::insert(const LChar* charactersToInsert, |
| 283 unsigned lengthToInsert, |
| 284 unsigned position) { |
| 285 if (position >= length()) { |
| 286 append(charactersToInsert, lengthToInsert); |
| 287 return; |
| 288 } |
| 289 ASSERT(m_impl); |
| 290 m_impl = insertInternal(m_impl.release(), charactersToInsert, lengthToInsert, |
| 291 position); |
| 292 } |
| 293 |
| 294 UChar32 String::characterStartingAt(unsigned i) const { |
| 295 if (!m_impl || i >= m_impl->length()) |
| 296 return 0; |
| 297 return m_impl->characterStartingAt(i); |
| 298 } |
| 299 |
| 300 void String::ensure16Bit() { |
| 301 unsigned length = this->length(); |
| 302 if (!length || !is8Bit()) |
| 303 return; |
| 304 m_impl = make16BitFrom8BitSource(m_impl->characters8(), length).impl(); |
| 305 } |
| 306 |
| 307 void String::truncate(unsigned position) { |
| 308 if (position >= length()) |
| 309 return; |
| 310 if (m_impl->is8Bit()) { |
| 311 LChar* data; |
| 312 RefPtr<StringImpl> newImpl = |
| 313 StringImpl::createUninitialized(position, data); |
| 314 memcpy(data, m_impl->characters8(), position * sizeof(LChar)); |
| 315 m_impl = newImpl.release(); |
| 316 } else { |
| 225 UChar* data; | 317 UChar* data; |
| 226 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(strLength + len
gthToAppend, data); | 318 RefPtr<StringImpl> newImpl = |
| 227 if (m_impl->is8Bit()) | 319 StringImpl::createUninitialized(position, data); |
| 228 StringImpl::copyChars(data, characters8(), strLength); | 320 memcpy(data, m_impl->characters16(), position * sizeof(UChar)); |
| 229 else | |
| 230 StringImpl::copyChars(data, characters16(), strLength); | |
| 231 StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppend); | |
| 232 m_impl = newImpl.release(); | 321 m_impl = newImpl.release(); |
| 233 } | 322 } |
| 234 | |
| 235 template<typename CharType> | |
| 236 PassRefPtr<StringImpl> insertInternal(PassRefPtr<StringImpl> impl, const CharTyp
e* charactersToInsert, unsigned lengthToInsert, unsigned position) | |
| 237 { | |
| 238 if (!lengthToInsert) | |
| 239 return impl; | |
| 240 | |
| 241 ASSERT(charactersToInsert); | |
| 242 UChar* data; // FIXME: We should be able to create an 8 bit string here. | |
| 243 RELEASE_ASSERT(lengthToInsert <= std::numeric_limits<unsigned>::max() - impl
->length()); | |
| 244 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(impl->length()
+ lengthToInsert, data); | |
| 245 | |
| 246 if (impl->is8Bit()) | |
| 247 StringImpl::copyChars(data, impl->characters8(), position); | |
| 248 else | |
| 249 StringImpl::copyChars(data, impl->characters16(), position); | |
| 250 | |
| 251 StringImpl::copyChars(data + position, charactersToInsert, lengthToInsert); | |
| 252 | |
| 253 if (impl->is8Bit()) | |
| 254 StringImpl::copyChars(data + position + lengthToInsert, impl->characters
8() + position, impl->length() - position); | |
| 255 else | |
| 256 StringImpl::copyChars(data + position + lengthToInsert, impl->characters
16() + position, impl->length() - position); | |
| 257 | |
| 258 return newImpl.release(); | |
| 259 } | |
| 260 | |
| 261 void String::insert(const UChar* charactersToInsert, unsigned lengthToInsert, un
signed position) | |
| 262 { | |
| 263 if (position >= length()) { | |
| 264 append(charactersToInsert, lengthToInsert); | |
| 265 return; | |
| 266 } | |
| 267 ASSERT(m_impl); | |
| 268 m_impl = insertInternal(m_impl.release(), charactersToInsert, lengthToInsert
, position); | |
| 269 } | |
| 270 | |
| 271 void String::insert(const LChar* charactersToInsert, unsigned lengthToInsert, un
signed position) | |
| 272 { | |
| 273 if (position >= length()) { | |
| 274 append(charactersToInsert, lengthToInsert); | |
| 275 return; | |
| 276 } | |
| 277 ASSERT(m_impl); | |
| 278 m_impl = insertInternal(m_impl.release(), charactersToInsert, lengthToInsert
, position); | |
| 279 } | |
| 280 | |
| 281 UChar32 String::characterStartingAt(unsigned i) const | |
| 282 { | |
| 283 if (!m_impl || i >= m_impl->length()) | |
| 284 return 0; | |
| 285 return m_impl->characterStartingAt(i); | |
| 286 } | |
| 287 | |
| 288 void String::ensure16Bit() | |
| 289 { | |
| 290 unsigned length = this->length(); | |
| 291 if (!length || !is8Bit()) | |
| 292 return; | |
| 293 m_impl = make16BitFrom8BitSource(m_impl->characters8(), length).impl(); | |
| 294 } | |
| 295 | |
| 296 void String::truncate(unsigned position) | |
| 297 { | |
| 298 if (position >= length()) | |
| 299 return; | |
| 300 if (m_impl->is8Bit()) { | |
| 301 LChar* data; | |
| 302 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(position, d
ata); | |
| 303 memcpy(data, m_impl->characters8(), position * sizeof(LChar)); | |
| 304 m_impl = newImpl.release(); | |
| 305 } else { | |
| 306 UChar* data; | |
| 307 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(position, d
ata); | |
| 308 memcpy(data, m_impl->characters16(), position * sizeof(UChar)); | |
| 309 m_impl = newImpl.release(); | |
| 310 } | |
| 311 } | 323 } |
| 312 | 324 |
| 313 template <typename CharacterType> | 325 template <typename CharacterType> |
| 314 inline void String::removeInternal(const CharacterType* characters, unsigned pos
ition, int lengthToRemove) | 326 inline void String::removeInternal(const CharacterType* characters, |
| 315 { | 327 unsigned position, |
| 316 CharacterType* data; | 328 int lengthToRemove) { |
| 317 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() - leng
thToRemove, data); | 329 CharacterType* data; |
| 318 memcpy(data, characters, position * sizeof(CharacterType)); | 330 RefPtr<StringImpl> newImpl = |
| 319 memcpy(data + position, characters + position + lengthToRemove, | 331 StringImpl::createUninitialized(length() - lengthToRemove, data); |
| 320 (length() - lengthToRemove - position) * sizeof(CharacterType)); | 332 memcpy(data, characters, position * sizeof(CharacterType)); |
| 321 | 333 memcpy(data + position, characters + position + lengthToRemove, |
| 322 m_impl = newImpl.release(); | 334 (length() - lengthToRemove - position) * sizeof(CharacterType)); |
| 323 } | 335 |
| 324 | 336 m_impl = newImpl.release(); |
| 325 void String::remove(unsigned position, int lengthToRemove) | 337 } |
| 326 { | 338 |
| 327 if (lengthToRemove <= 0) | 339 void String::remove(unsigned position, int lengthToRemove) { |
| 328 return; | 340 if (lengthToRemove <= 0) |
| 329 if (position >= length()) | 341 return; |
| 330 return; | 342 if (position >= length()) |
| 331 if (static_cast<unsigned>(lengthToRemove) > length() - position) | 343 return; |
| 332 lengthToRemove = length() - position; | 344 if (static_cast<unsigned>(lengthToRemove) > length() - position) |
| 333 | 345 lengthToRemove = length() - position; |
| 334 if (is8Bit()) { | 346 |
| 335 removeInternal(characters8(), position, lengthToRemove); | 347 if (is8Bit()) { |
| 336 | 348 removeInternal(characters8(), position, lengthToRemove); |
| 337 return; | 349 |
| 338 } | 350 return; |
| 339 | 351 } |
| 340 removeInternal(characters16(), position, lengthToRemove); | 352 |
| 341 } | 353 removeInternal(characters16(), position, lengthToRemove); |
| 342 | 354 } |
| 343 String String::substring(unsigned pos, unsigned len) const | 355 |
| 344 { | 356 String String::substring(unsigned pos, unsigned len) const { |
| 345 if (!m_impl) | 357 if (!m_impl) |
| 346 return String(); | 358 return String(); |
| 347 return m_impl->substring(pos, len); | 359 return m_impl->substring(pos, len); |
| 348 } | 360 } |
| 349 | 361 |
| 350 String String::lower() const | 362 String String::lower() const { |
| 351 { | 363 if (!m_impl) |
| 352 if (!m_impl) | 364 return String(); |
| 353 return String(); | 365 return m_impl->lower(); |
| 354 return m_impl->lower(); | 366 } |
| 355 } | 367 |
| 356 | 368 String String::upper() const { |
| 357 String String::upper() const | 369 if (!m_impl) |
| 358 { | 370 return String(); |
| 359 if (!m_impl) | 371 return m_impl->upper(); |
| 360 return String(); | 372 } |
| 361 return m_impl->upper(); | 373 |
| 362 } | 374 String String::lower(const AtomicString& localeIdentifier) const { |
| 363 | 375 if (!m_impl) |
| 364 String String::lower(const AtomicString& localeIdentifier) const | 376 return String(); |
| 365 { | 377 return m_impl->lower(localeIdentifier); |
| 366 if (!m_impl) | 378 } |
| 367 return String(); | 379 |
| 368 return m_impl->lower(localeIdentifier); | 380 String String::upper(const AtomicString& localeIdentifier) const { |
| 369 } | 381 if (!m_impl) |
| 370 | 382 return String(); |
| 371 String String::upper(const AtomicString& localeIdentifier) const | 383 return m_impl->upper(localeIdentifier); |
| 372 { | 384 } |
| 373 if (!m_impl) | 385 |
| 374 return String(); | 386 String String::stripWhiteSpace() const { |
| 375 return m_impl->upper(localeIdentifier); | 387 if (!m_impl) |
| 376 } | 388 return String(); |
| 377 | 389 return m_impl->stripWhiteSpace(); |
| 378 String String::stripWhiteSpace() const | 390 } |
| 379 { | 391 |
| 380 if (!m_impl) | 392 String String::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace) const { |
| 381 return String(); | 393 if (!m_impl) |
| 382 return m_impl->stripWhiteSpace(); | 394 return String(); |
| 383 } | 395 return m_impl->stripWhiteSpace(isWhiteSpace); |
| 384 | 396 } |
| 385 String String::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace) const | 397 |
| 386 { | 398 String String::simplifyWhiteSpace(StripBehavior stripBehavior) const { |
| 387 if (!m_impl) | 399 if (!m_impl) |
| 388 return String(); | 400 return String(); |
| 389 return m_impl->stripWhiteSpace(isWhiteSpace); | 401 return m_impl->simplifyWhiteSpace(stripBehavior); |
| 390 } | 402 } |
| 391 | 403 |
| 392 String String::simplifyWhiteSpace(StripBehavior stripBehavior) const | 404 String String::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace, |
| 393 { | 405 StripBehavior stripBehavior) const { |
| 394 if (!m_impl) | 406 if (!m_impl) |
| 395 return String(); | 407 return String(); |
| 396 return m_impl->simplifyWhiteSpace(stripBehavior); | 408 return m_impl->simplifyWhiteSpace(isWhiteSpace, stripBehavior); |
| 397 } | 409 } |
| 398 | 410 |
| 399 String String::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace, StripBeh
avior stripBehavior) const | 411 String String::removeCharacters(CharacterMatchFunctionPtr findMatch) const { |
| 400 { | 412 if (!m_impl) |
| 401 if (!m_impl) | 413 return String(); |
| 402 return String(); | 414 return m_impl->removeCharacters(findMatch); |
| 403 return m_impl->simplifyWhiteSpace(isWhiteSpace, stripBehavior); | 415 } |
| 404 } | 416 |
| 405 | 417 String String::foldCase() const { |
| 406 String String::removeCharacters(CharacterMatchFunctionPtr findMatch) const | 418 if (!m_impl) |
| 407 { | 419 return String(); |
| 408 if (!m_impl) | 420 return m_impl->foldCase(); |
| 409 return String(); | 421 } |
| 410 return m_impl->removeCharacters(findMatch); | 422 |
| 411 } | 423 Vector<UChar> String::charactersWithNullTermination() const { |
| 412 | 424 if (!m_impl) |
| 413 String String::foldCase() const | 425 return Vector<UChar>(); |
| 414 { | 426 |
| 415 if (!m_impl) | 427 Vector<UChar> result; |
| 416 return String(); | 428 result.reserveInitialCapacity(length() + 1); |
| 417 return m_impl->foldCase(); | 429 appendTo(result); |
| 418 } | 430 result.append('\0'); |
| 419 | 431 return result; |
| 420 Vector<UChar> String::charactersWithNullTermination() const | 432 } |
| 421 { | 433 |
| 422 if (!m_impl) | 434 unsigned String::copyTo(UChar* buffer, unsigned pos, unsigned maxLength) const { |
| 423 return Vector<UChar>(); | 435 unsigned length = this->length(); |
| 424 | 436 RELEASE_ASSERT(pos <= length); |
| 425 Vector<UChar> result; | 437 unsigned numCharacters = std::min(length - pos, maxLength); |
| 426 result.reserveInitialCapacity(length() + 1); | 438 if (!numCharacters) |
| 427 appendTo(result); | 439 return 0; |
| 428 result.append('\0'); | 440 if (is8Bit()) |
| 429 return result; | 441 StringImpl::copyChars(buffer, characters8() + pos, numCharacters); |
| 430 } | 442 else |
| 431 | 443 StringImpl::copyChars(buffer, characters16() + pos, numCharacters); |
| 432 unsigned String::copyTo(UChar* buffer, unsigned pos, unsigned maxLength) const | 444 return numCharacters; |
| 433 { | 445 } |
| 434 unsigned length = this->length(); | 446 |
| 435 RELEASE_ASSERT(pos <= length); | 447 String String::format(const char* format, ...) { |
| 436 unsigned numCharacters = std::min(length - pos, maxLength); | 448 va_list args; |
| 437 if (!numCharacters) | 449 va_start(args, format); |
| 438 return 0; | 450 |
| 439 if (is8Bit()) | 451 // Do the format once to get the length. |
| 440 StringImpl::copyChars(buffer, characters8() + pos, numCharacters); | |
| 441 else | |
| 442 StringImpl::copyChars(buffer, characters16() + pos, numCharacters); | |
| 443 return numCharacters; | |
| 444 } | |
| 445 | |
| 446 String String::format(const char *format, ...) | |
| 447 { | |
| 448 va_list args; | |
| 449 va_start(args, format); | |
| 450 | |
| 451 // Do the format once to get the length. | |
| 452 #if COMPILER(MSVC) | 452 #if COMPILER(MSVC) |
| 453 int result = _vscprintf(format, args); | 453 int result = _vscprintf(format, args); |
| 454 #else | 454 #else |
| 455 char ch; | 455 char ch; |
| 456 int result = vsnprintf(&ch, 1, format, args); | 456 int result = vsnprintf(&ch, 1, format, args); |
| 457 // We need to call va_end() and then va_start() again here, as the | 457 // We need to call va_end() and then va_start() again here, as the |
| 458 // contents of args is undefined after the call to vsnprintf | 458 // contents of args is undefined after the call to vsnprintf |
| 459 // according to http://man.cx/snprintf(3) | 459 // according to http://man.cx/snprintf(3) |
| 460 // | 460 // |
| 461 // Not calling va_end/va_start here happens to work on lots of | 461 // Not calling va_end/va_start here happens to work on lots of |
| 462 // systems, but fails e.g. on 64bit Linux. | 462 // systems, but fails e.g. on 64bit Linux. |
| 463 #endif | 463 #endif |
| 464 va_end(args); | 464 va_end(args); |
| 465 | 465 |
| 466 if (result == 0) | 466 if (result == 0) |
| 467 return String(""); | 467 return String(""); |
| 468 if (result < 0) | 468 if (result < 0) |
| 469 return String(); | 469 return String(); |
| 470 | 470 |
| 471 Vector<char, 256> buffer; | 471 Vector<char, 256> buffer; |
| 472 unsigned len = result; | 472 unsigned len = result; |
| 473 buffer.grow(len + 1); | 473 buffer.grow(len + 1); |
| 474 | 474 |
| 475 va_start(args, format); | 475 va_start(args, format); |
| 476 // Now do the formatting again, guaranteed to fit. | 476 // Now do the formatting again, guaranteed to fit. |
| 477 vsnprintf(buffer.data(), buffer.size(), format, args); | 477 vsnprintf(buffer.data(), buffer.size(), format, args); |
| 478 | 478 |
| 479 va_end(args); | 479 va_end(args); |
| 480 | 480 |
| 481 return StringImpl::create(reinterpret_cast<const LChar*>(buffer.data()), len
); | 481 return StringImpl::create(reinterpret_cast<const LChar*>(buffer.data()), len); |
| 482 } | 482 } |
| 483 | 483 |
| 484 String String::number(int number) | 484 String String::number(int number) { |
| 485 { | 485 return numberToStringSigned<String>(number); |
| 486 return numberToStringSigned<String>(number); | 486 } |
| 487 } | 487 |
| 488 | 488 String String::number(unsigned number) { |
| 489 String String::number(unsigned number) | 489 return numberToStringUnsigned<String>(number); |
| 490 { | 490 } |
| 491 return numberToStringUnsigned<String>(number); | 491 |
| 492 } | 492 String String::number(long number) { |
| 493 | 493 return numberToStringSigned<String>(number); |
| 494 String String::number(long number) | 494 } |
| 495 { | 495 |
| 496 return numberToStringSigned<String>(number); | 496 String String::number(unsigned long number) { |
| 497 } | 497 return numberToStringUnsigned<String>(number); |
| 498 | 498 } |
| 499 String String::number(unsigned long number) | 499 |
| 500 { | 500 String String::number(long long number) { |
| 501 return numberToStringUnsigned<String>(number); | 501 return numberToStringSigned<String>(number); |
| 502 } | 502 } |
| 503 | 503 |
| 504 String String::number(long long number) | 504 String String::number(unsigned long long number) { |
| 505 { | 505 return numberToStringUnsigned<String>(number); |
| 506 return numberToStringSigned<String>(number); | 506 } |
| 507 } | 507 |
| 508 | 508 String String::number( |
| 509 String String::number(unsigned long long number) | 509 double number, |
| 510 { | 510 unsigned precision, |
| 511 return numberToStringUnsigned<String>(number); | 511 TrailingZerosTruncatingPolicy trailingZerosTruncatingPolicy) { |
| 512 } | 512 NumberToStringBuffer buffer; |
| 513 | 513 return String(numberToFixedPrecisionString( |
| 514 String String::number(double number, unsigned precision, TrailingZerosTruncating
Policy trailingZerosTruncatingPolicy) | 514 number, precision, buffer, |
| 515 { | 515 trailingZerosTruncatingPolicy == TruncateTrailingZeros)); |
| 516 NumberToStringBuffer buffer; | 516 } |
| 517 return String(numberToFixedPrecisionString(number, precision, buffer, traili
ngZerosTruncatingPolicy == TruncateTrailingZeros)); | 517 |
| 518 } | 518 String String::numberToStringECMAScript(double number) { |
| 519 | 519 NumberToStringBuffer buffer; |
| 520 String String::numberToStringECMAScript(double number) | 520 return String(numberToString(number, buffer)); |
| 521 { | 521 } |
| 522 NumberToStringBuffer buffer; | 522 |
| 523 return String(numberToString(number, buffer)); | 523 String String::numberToStringFixedWidth(double number, unsigned decimalPlaces) { |
| 524 } | 524 NumberToStringBuffer buffer; |
| 525 | 525 return String(numberToFixedWidthString(number, decimalPlaces, buffer)); |
| 526 String String::numberToStringFixedWidth(double number, unsigned decimalPlaces) | 526 } |
| 527 { | 527 |
| 528 NumberToStringBuffer buffer; | 528 int String::toIntStrict(bool* ok, int base) const { |
| 529 return String(numberToFixedWidthString(number, decimalPlaces, buffer)); | 529 if (!m_impl) { |
| 530 } | 530 if (ok) |
| 531 | 531 *ok = false; |
| 532 int String::toIntStrict(bool* ok, int base) const | 532 return 0; |
| 533 { | 533 } |
| 534 if (!m_impl) { | 534 return m_impl->toIntStrict(ok, base); |
| 535 if (ok) | 535 } |
| 536 *ok = false; | 536 |
| 537 return 0; | 537 unsigned String::toUIntStrict(bool* ok, int base) const { |
| 538 } | 538 if (!m_impl) { |
| 539 return m_impl->toIntStrict(ok, base); | 539 if (ok) |
| 540 } | 540 *ok = false; |
| 541 | 541 return 0; |
| 542 unsigned String::toUIntStrict(bool* ok, int base) const | 542 } |
| 543 { | 543 return m_impl->toUIntStrict(ok, base); |
| 544 if (!m_impl) { | 544 } |
| 545 if (ok) | 545 |
| 546 *ok = false; | 546 int64_t String::toInt64Strict(bool* ok, int base) const { |
| 547 return 0; | 547 if (!m_impl) { |
| 548 } | 548 if (ok) |
| 549 return m_impl->toUIntStrict(ok, base); | 549 *ok = false; |
| 550 } | 550 return 0; |
| 551 | 551 } |
| 552 int64_t String::toInt64Strict(bool* ok, int base) const | 552 return m_impl->toInt64Strict(ok, base); |
| 553 { | 553 } |
| 554 if (!m_impl) { | 554 |
| 555 if (ok) | 555 uint64_t String::toUInt64Strict(bool* ok, int base) const { |
| 556 *ok = false; | 556 if (!m_impl) { |
| 557 return 0; | 557 if (ok) |
| 558 } | 558 *ok = false; |
| 559 return m_impl->toInt64Strict(ok, base); | 559 return 0; |
| 560 } | 560 } |
| 561 | 561 return m_impl->toUInt64Strict(ok, base); |
| 562 uint64_t String::toUInt64Strict(bool* ok, int base) const | 562 } |
| 563 { | 563 |
| 564 if (!m_impl) { | 564 int String::toInt(bool* ok) const { |
| 565 if (ok) | 565 if (!m_impl) { |
| 566 *ok = false; | 566 if (ok) |
| 567 return 0; | 567 *ok = false; |
| 568 } | 568 return 0; |
| 569 return m_impl->toUInt64Strict(ok, base); | 569 } |
| 570 } | 570 return m_impl->toInt(ok); |
| 571 | 571 } |
| 572 int String::toInt(bool* ok) const | 572 |
| 573 { | 573 unsigned String::toUInt(bool* ok) const { |
| 574 if (!m_impl) { | 574 if (!m_impl) { |
| 575 if (ok) | 575 if (ok) |
| 576 *ok = false; | 576 *ok = false; |
| 577 return 0; | 577 return 0; |
| 578 } | 578 } |
| 579 return m_impl->toInt(ok); | 579 return m_impl->toUInt(ok); |
| 580 } | 580 } |
| 581 | 581 |
| 582 unsigned String::toUInt(bool* ok) const | 582 int64_t String::toInt64(bool* ok) const { |
| 583 { | 583 if (!m_impl) { |
| 584 if (!m_impl) { | 584 if (ok) |
| 585 if (ok) | 585 *ok = false; |
| 586 *ok = false; | 586 return 0; |
| 587 return 0; | 587 } |
| 588 } | 588 return m_impl->toInt64(ok); |
| 589 return m_impl->toUInt(ok); | 589 } |
| 590 } | 590 |
| 591 | 591 uint64_t String::toUInt64(bool* ok) const { |
| 592 int64_t String::toInt64(bool* ok) const | 592 if (!m_impl) { |
| 593 { | 593 if (ok) |
| 594 if (!m_impl) { | 594 *ok = false; |
| 595 if (ok) | 595 return 0; |
| 596 *ok = false; | 596 } |
| 597 return 0; | 597 return m_impl->toUInt64(ok); |
| 598 } | 598 } |
| 599 return m_impl->toInt64(ok); | 599 |
| 600 } | 600 double String::toDouble(bool* ok) const { |
| 601 | 601 if (!m_impl) { |
| 602 uint64_t String::toUInt64(bool* ok) const | 602 if (ok) |
| 603 { | 603 *ok = false; |
| 604 if (!m_impl) { | 604 return 0.0; |
| 605 if (ok) | 605 } |
| 606 *ok = false; | 606 return m_impl->toDouble(ok); |
| 607 return 0; | 607 } |
| 608 } | 608 |
| 609 return m_impl->toUInt64(ok); | 609 float String::toFloat(bool* ok) const { |
| 610 } | 610 if (!m_impl) { |
| 611 | 611 if (ok) |
| 612 double String::toDouble(bool* ok) const | 612 *ok = false; |
| 613 { | 613 return 0.0f; |
| 614 if (!m_impl) { | 614 } |
| 615 if (ok) | 615 return m_impl->toFloat(ok); |
| 616 *ok = false; | 616 } |
| 617 return 0.0; | 617 |
| 618 } | 618 String String::isolatedCopy() const { |
| 619 return m_impl->toDouble(ok); | 619 if (!m_impl) |
| 620 } | 620 return String(); |
| 621 | 621 return m_impl->isolatedCopy(); |
| 622 float String::toFloat(bool* ok) const | 622 } |
| 623 { | 623 |
| 624 if (!m_impl) { | 624 bool String::isSafeToSendToAnotherThread() const { |
| 625 if (ok) | 625 if (!impl()) |
| 626 *ok = false; | 626 return true; |
| 627 return 0.0f; | 627 if (impl()->isStatic()) |
| 628 } | 628 return true; |
| 629 return m_impl->toFloat(ok); | 629 // AtomicStrings are not safe to send between threads as ~StringImpl() |
| 630 } | 630 // will try to remove them from the wrong AtomicStringTable. |
| 631 | 631 if (impl()->isAtomic()) |
| 632 String String::isolatedCopy() const | |
| 633 { | |
| 634 if (!m_impl) | |
| 635 return String(); | |
| 636 return m_impl->isolatedCopy(); | |
| 637 } | |
| 638 | |
| 639 bool String::isSafeToSendToAnotherThread() const | |
| 640 { | |
| 641 if (!impl()) | |
| 642 return true; | |
| 643 if (impl()->isStatic()) | |
| 644 return true; | |
| 645 // AtomicStrings are not safe to send between threads as ~StringImpl() | |
| 646 // will try to remove them from the wrong AtomicStringTable. | |
| 647 if (impl()->isAtomic()) | |
| 648 return false; | |
| 649 if (impl()->hasOneRef()) | |
| 650 return true; | |
| 651 return false; | 632 return false; |
| 652 } | 633 if (impl()->hasOneRef()) |
| 653 | 634 return true; |
| 654 void String::split(const String& separator, bool allowEmptyEntries, Vector<Strin
g>& result) const | 635 return false; |
| 655 { | 636 } |
| 656 result.clear(); | 637 |
| 657 | 638 void String::split(const String& separator, |
| 658 unsigned startPos = 0; | 639 bool allowEmptyEntries, |
| 659 size_t endPos; | 640 Vector<String>& result) const { |
| 660 while ((endPos = find(separator, startPos)) != kNotFound) { | 641 result.clear(); |
| 661 if (allowEmptyEntries || startPos != endPos) | 642 |
| 662 result.append(substring(startPos, endPos - startPos)); | 643 unsigned startPos = 0; |
| 663 startPos = endPos + separator.length(); | 644 size_t endPos; |
| 664 } | 645 while ((endPos = find(separator, startPos)) != kNotFound) { |
| 665 if (allowEmptyEntries || startPos != length()) | 646 if (allowEmptyEntries || startPos != endPos) |
| 666 result.append(substring(startPos)); | 647 result.append(substring(startPos, endPos - startPos)); |
| 667 } | 648 startPos = endPos + separator.length(); |
| 668 | 649 } |
| 669 void String::split(UChar separator, bool allowEmptyEntries, Vector<String>& resu
lt) const | 650 if (allowEmptyEntries || startPos != length()) |
| 670 { | 651 result.append(substring(startPos)); |
| 671 result.clear(); | 652 } |
| 672 | 653 |
| 673 unsigned startPos = 0; | 654 void String::split(UChar separator, |
| 674 size_t endPos; | 655 bool allowEmptyEntries, |
| 675 while ((endPos = find(separator, startPos)) != kNotFound) { | 656 Vector<String>& result) const { |
| 676 if (allowEmptyEntries || startPos != endPos) | 657 result.clear(); |
| 677 result.append(substring(startPos, endPos - startPos)); | 658 |
| 678 startPos = endPos + 1; | 659 unsigned startPos = 0; |
| 679 } | 660 size_t endPos; |
| 680 if (allowEmptyEntries || startPos != length()) | 661 while ((endPos = find(separator, startPos)) != kNotFound) { |
| 681 result.append(substring(startPos)); | 662 if (allowEmptyEntries || startPos != endPos) |
| 682 } | 663 result.append(substring(startPos, endPos - startPos)); |
| 683 | 664 startPos = endPos + 1; |
| 684 CString String::ascii() const | 665 } |
| 685 { | 666 if (allowEmptyEntries || startPos != length()) |
| 686 // Printable ASCII characters 32..127 and the null character are | 667 result.append(substring(startPos)); |
| 687 // preserved, characters outside of this range are converted to '?'. | 668 } |
| 688 | 669 |
| 689 unsigned length = this->length(); | 670 CString String::ascii() const { |
| 690 if (!length) { | 671 // Printable ASCII characters 32..127 and the null character are |
| 691 char* characterBuffer; | 672 // preserved, characters outside of this range are converted to '?'. |
| 692 return CString::newUninitialized(length, characterBuffer); | 673 |
| 693 } | 674 unsigned length = this->length(); |
| 694 | 675 if (!length) { |
| 695 if (this->is8Bit()) { | 676 char* characterBuffer; |
| 696 const LChar* characters = this->characters8(); | 677 return CString::newUninitialized(length, characterBuffer); |
| 697 | 678 } |
| 698 char* characterBuffer; | 679 |
| 699 CString result = CString::newUninitialized(length, characterBuffer); | 680 if (this->is8Bit()) { |
| 700 | 681 const LChar* characters = this->characters8(); |
| 701 for (unsigned i = 0; i < length; ++i) { | |
| 702 LChar ch = characters[i]; | |
| 703 characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch; | |
| 704 } | |
| 705 | |
| 706 return result; | |
| 707 } | |
| 708 | |
| 709 const UChar* characters = this->characters16(); | |
| 710 | 682 |
| 711 char* characterBuffer; | 683 char* characterBuffer; |
| 712 CString result = CString::newUninitialized(length, characterBuffer); | 684 CString result = CString::newUninitialized(length, characterBuffer); |
| 713 | 685 |
| 714 for (unsigned i = 0; i < length; ++i) { | 686 for (unsigned i = 0; i < length; ++i) { |
| 715 UChar ch = characters[i]; | 687 LChar ch = characters[i]; |
| 716 characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : static_cast<
char>(ch); | 688 characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch; |
| 717 } | 689 } |
| 718 | 690 |
| 719 return result; | 691 return result; |
| 720 } | 692 } |
| 721 | 693 |
| 722 CString String::latin1() const | 694 const UChar* characters = this->characters16(); |
| 723 { | 695 |
| 724 // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are | 696 char* characterBuffer; |
| 725 // preserved, characters outside of this range are converted to '?'. | 697 CString result = CString::newUninitialized(length, characterBuffer); |
| 726 | 698 |
| 727 unsigned length = this->length(); | 699 for (unsigned i = 0; i < length; ++i) { |
| 728 | 700 UChar ch = characters[i]; |
| 729 if (!length) | 701 characterBuffer[i] = |
| 730 return CString("", 0); | 702 ch && (ch < 0x20 || ch > 0x7f) ? '?' : static_cast<char>(ch); |
| 731 | 703 } |
| 732 if (is8Bit()) | 704 |
| 733 return CString(reinterpret_cast<const char*>(this->characters8()), lengt
h); | 705 return result; |
| 734 | 706 } |
| 707 |
| 708 CString String::latin1() const { |
| 709 // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are |
| 710 // preserved, characters outside of this range are converted to '?'. |
| 711 |
| 712 unsigned length = this->length(); |
| 713 |
| 714 if (!length) |
| 715 return CString("", 0); |
| 716 |
| 717 if (is8Bit()) |
| 718 return CString(reinterpret_cast<const char*>(this->characters8()), length); |
| 719 |
| 720 const UChar* characters = this->characters16(); |
| 721 |
| 722 char* characterBuffer; |
| 723 CString result = CString::newUninitialized(length, characterBuffer); |
| 724 |
| 725 for (unsigned i = 0; i < length; ++i) { |
| 726 UChar ch = characters[i]; |
| 727 characterBuffer[i] = ch > 0xff ? '?' : static_cast<char>(ch); |
| 728 } |
| 729 |
| 730 return result; |
| 731 } |
| 732 |
| 733 // Helper to write a three-byte UTF-8 code point to the buffer, caller must chec
k room is available. |
| 734 static inline void putUTF8Triple(char*& buffer, UChar ch) { |
| 735 ASSERT(ch >= 0x0800); |
| 736 *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0); |
| 737 *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80); |
| 738 *buffer++ = static_cast<char>((ch & 0x3F) | 0x80); |
| 739 } |
| 740 |
| 741 CString String::utf8(UTF8ConversionMode mode) const { |
| 742 unsigned length = this->length(); |
| 743 |
| 744 if (!length) |
| 745 return CString("", 0); |
| 746 |
| 747 // Allocate a buffer big enough to hold all the characters |
| 748 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes). |
| 749 // Optimization ideas, if we find this function is hot: |
| 750 // * We could speculatively create a CStringBuffer to contain 'length' |
| 751 // characters, and resize if necessary (i.e. if the buffer contains |
| 752 // non-ascii characters). (Alternatively, scan the buffer first for |
| 753 // ascii characters, so we know this will be sufficient). |
| 754 // * We could allocate a CStringBuffer with an appropriate size to |
| 755 // have a good chance of being able to write the string into the |
| 756 // buffer without reallocing (say, 1.5 x length). |
| 757 if (length > std::numeric_limits<unsigned>::max() / 3) |
| 758 return CString(); |
| 759 Vector<char, 1024> bufferVector(length * 3); |
| 760 |
| 761 char* buffer = bufferVector.data(); |
| 762 |
| 763 if (is8Bit()) { |
| 764 const LChar* characters = this->characters8(); |
| 765 |
| 766 ConversionResult result = |
| 767 convertLatin1ToUTF8(&characters, characters + length, &buffer, |
| 768 buffer + bufferVector.size()); |
| 769 ASSERT_UNUSED( |
| 770 result, |
| 771 result != |
| 772 targetExhausted); // (length * 3) should be sufficient for any conv
ersion |
| 773 } else { |
| 735 const UChar* characters = this->characters16(); | 774 const UChar* characters = this->characters16(); |
| 736 | 775 |
| 737 char* characterBuffer; | 776 if (mode == StrictUTF8ConversionReplacingUnpairedSurrogatesWithFFFD) { |
| 738 CString result = CString::newUninitialized(length, characterBuffer); | 777 const UChar* charactersEnd = characters + length; |
| 739 | 778 char* bufferEnd = buffer + bufferVector.size(); |
| 740 for (unsigned i = 0; i < length; ++i) { | 779 while (characters < charactersEnd) { |
| 741 UChar ch = characters[i]; | 780 // Use strict conversion to detect unpaired surrogates. |
| 742 characterBuffer[i] = ch > 0xff ? '?' : static_cast<char>(ch); | 781 ConversionResult result = convertUTF16ToUTF8(&characters, charactersEnd, |
| 782 &buffer, bufferEnd, true); |
| 783 ASSERT(result != targetExhausted); |
| 784 // Conversion fails when there is an unpaired surrogate. Put |
| 785 // replacement character (U+FFFD) instead of the unpaired |
| 786 // surrogate. |
| 787 if (result != conversionOK) { |
| 788 ASSERT((0xD800 <= *characters && *characters <= 0xDFFF)); |
| 789 // There should be room left, since one UChar hasn't been |
| 790 // converted. |
| 791 ASSERT((buffer + 3) <= bufferEnd); |
| 792 putUTF8Triple(buffer, replacementCharacter); |
| 793 ++characters; |
| 794 } |
| 795 } |
| 796 } else { |
| 797 bool strict = mode == StrictUTF8Conversion; |
| 798 ConversionResult result = |
| 799 convertUTF16ToUTF8(&characters, characters + length, &buffer, |
| 800 buffer + bufferVector.size(), strict); |
| 801 ASSERT( |
| 802 result != |
| 803 targetExhausted); // (length * 3) should be sufficient for any conver
sion |
| 804 |
| 805 // Only produced from strict conversion. |
| 806 if (result == sourceIllegal) { |
| 807 ASSERT(strict); |
| 808 return CString(); |
| 809 } |
| 810 |
| 811 // Check for an unconverted high surrogate. |
| 812 if (result == sourceExhausted) { |
| 813 if (strict) |
| 814 return CString(); |
| 815 // This should be one unpaired high surrogate. Treat it the same |
| 816 // was as an unpaired high surrogate would have been handled in |
| 817 // the middle of a string with non-strict conversion - which is |
| 818 // to say, simply encode it to UTF-8. |
| 819 ASSERT((characters + 1) == (this->characters16() + length)); |
| 820 ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF)); |
| 821 // There should be room left, since one UChar hasn't been |
| 822 // converted. |
| 823 ASSERT((buffer + 3) <= (buffer + bufferVector.size())); |
| 824 putUTF8Triple(buffer, *characters); |
| 825 } |
| 743 } | 826 } |
| 744 | 827 } |
| 745 return result; | 828 |
| 746 } | 829 return CString(bufferVector.data(), buffer - bufferVector.data()); |
| 747 | 830 } |
| 748 // Helper to write a three-byte UTF-8 code point to the buffer, caller must chec
k room is available. | 831 |
| 749 static inline void putUTF8Triple(char*& buffer, UChar ch) | 832 String String::make8BitFrom16BitSource(const UChar* source, size_t length) { |
| 750 { | 833 if (!length) |
| 751 ASSERT(ch >= 0x0800); | 834 return emptyString(); |
| 752 *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0); | 835 |
| 753 *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80); | 836 LChar* destination; |
| 754 *buffer++ = static_cast<char>((ch & 0x3F) | 0x80); | 837 String result = String::createUninitialized(length, destination); |
| 755 } | 838 |
| 756 | 839 copyLCharsFromUCharSource(destination, source, length); |
| 757 CString String::utf8(UTF8ConversionMode mode) const | 840 |
| 758 { | 841 return result; |
| 759 unsigned length = this->length(); | 842 } |
| 760 | 843 |
| 761 if (!length) | 844 String String::make16BitFrom8BitSource(const LChar* source, size_t length) { |
| 762 return CString("", 0); | 845 if (!length) |
| 763 | 846 return emptyString16Bit(); |
| 764 // Allocate a buffer big enough to hold all the characters | 847 |
| 765 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes). | 848 UChar* destination; |
| 766 // Optimization ideas, if we find this function is hot: | 849 String result = String::createUninitialized(length, destination); |
| 767 // * We could speculatively create a CStringBuffer to contain 'length' | 850 |
| 768 // characters, and resize if necessary (i.e. if the buffer contains | 851 StringImpl::copyChars(destination, source, length); |
| 769 // non-ascii characters). (Alternatively, scan the buffer first for | 852 |
| 770 // ascii characters, so we know this will be sufficient). | 853 return result; |
| 771 // * We could allocate a CStringBuffer with an appropriate size to | 854 } |
| 772 // have a good chance of being able to write the string into the | 855 |
| 773 // buffer without reallocing (say, 1.5 x length). | 856 String String::fromUTF8(const LChar* stringStart, size_t length) { |
| 774 if (length > std::numeric_limits<unsigned>::max() / 3) | 857 RELEASE_ASSERT(length <= std::numeric_limits<unsigned>::max()); |
| 775 return CString(); | 858 |
| 776 Vector<char, 1024> bufferVector(length * 3); | 859 if (!stringStart) |
| 777 | 860 return String(); |
| 778 char* buffer = bufferVector.data(); | 861 |
| 779 | 862 if (!length) |
| 780 if (is8Bit()) { | 863 return emptyString(); |
| 781 const LChar* characters = this->characters8(); | 864 |
| 782 | 865 if (charactersAreAllASCII(stringStart, length)) |
| 783 ConversionResult result = convertLatin1ToUTF8(&characters, characters +
length, &buffer, buffer + bufferVector.size()); | 866 return StringImpl::create(stringStart, length); |
| 784 ASSERT_UNUSED(result, result != targetExhausted); // (length * 3) should
be sufficient for any conversion | 867 |
| 785 } else { | 868 Vector<UChar, 1024> buffer(length); |
| 786 const UChar* characters = this->characters16(); | 869 UChar* bufferStart = buffer.data(); |
| 787 | 870 |
| 788 if (mode == StrictUTF8ConversionReplacingUnpairedSurrogatesWithFFFD) { | 871 UChar* bufferCurrent = bufferStart; |
| 789 const UChar* charactersEnd = characters + length; | 872 const char* stringCurrent = reinterpret_cast<const char*>(stringStart); |
| 790 char* bufferEnd = buffer + bufferVector.size(); | 873 if (convertUTF8ToUTF16( |
| 791 while (characters < charactersEnd) { | 874 &stringCurrent, reinterpret_cast<const char*>(stringStart + length), |
| 792 // Use strict conversion to detect unpaired surrogates. | 875 &bufferCurrent, bufferCurrent + buffer.size()) != conversionOK) |
| 793 ConversionResult result = convertUTF16ToUTF8(&characters, charac
tersEnd, &buffer, bufferEnd, true); | 876 return String(); |
| 794 ASSERT(result != targetExhausted); | 877 |
| 795 // Conversion fails when there is an unpaired surrogate. Put | 878 unsigned utf16Length = bufferCurrent - bufferStart; |
| 796 // replacement character (U+FFFD) instead of the unpaired | 879 ASSERT(utf16Length < length); |
| 797 // surrogate. | 880 return StringImpl::create(bufferStart, utf16Length); |
| 798 if (result != conversionOK) { | 881 } |
| 799 ASSERT((0xD800 <= *characters && *characters <= 0xDFFF)); | 882 |
| 800 // There should be room left, since one UChar hasn't been | 883 String String::fromUTF8(const LChar* string) { |
| 801 // converted. | 884 if (!string) |
| 802 ASSERT((buffer + 3) <= bufferEnd); | 885 return String(); |
| 803 putUTF8Triple(buffer, replacementCharacter); | 886 return fromUTF8(string, strlen(reinterpret_cast<const char*>(string))); |
| 804 ++characters; | 887 } |
| 805 } | 888 |
| 806 } | 889 String String::fromUTF8(const CString& s) { |
| 807 } else { | 890 return fromUTF8(s.data()); |
| 808 bool strict = mode == StrictUTF8Conversion; | 891 } |
| 809 ConversionResult result = convertUTF16ToUTF8(&characters, characters
+ length, &buffer, buffer + bufferVector.size(), strict); | 892 |
| 810 ASSERT(result != targetExhausted); // (length * 3) should be suffici
ent for any conversion | 893 String String::fromUTF8WithLatin1Fallback(const LChar* string, size_t size) { |
| 811 | 894 String utf8 = fromUTF8(string, size); |
| 812 // Only produced from strict conversion. | 895 if (!utf8) |
| 813 if (result == sourceIllegal) { | 896 return String(string, size); |
| 814 ASSERT(strict); | 897 return utf8; |
| 815 return CString(); | |
| 816 } | |
| 817 | |
| 818 // Check for an unconverted high surrogate. | |
| 819 if (result == sourceExhausted) { | |
| 820 if (strict) | |
| 821 return CString(); | |
| 822 // This should be one unpaired high surrogate. Treat it the same | |
| 823 // was as an unpaired high surrogate would have been handled in | |
| 824 // the middle of a string with non-strict conversion - which is | |
| 825 // to say, simply encode it to UTF-8. | |
| 826 ASSERT((characters + 1) == (this->characters16() + length)); | |
| 827 ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF)); | |
| 828 // There should be room left, since one UChar hasn't been | |
| 829 // converted. | |
| 830 ASSERT((buffer + 3) <= (buffer + bufferVector.size())); | |
| 831 putUTF8Triple(buffer, *characters); | |
| 832 } | |
| 833 } | |
| 834 } | |
| 835 | |
| 836 return CString(bufferVector.data(), buffer - bufferVector.data()); | |
| 837 } | |
| 838 | |
| 839 String String::make8BitFrom16BitSource(const UChar* source, size_t length) | |
| 840 { | |
| 841 if (!length) | |
| 842 return emptyString(); | |
| 843 | |
| 844 LChar* destination; | |
| 845 String result = String::createUninitialized(length, destination); | |
| 846 | |
| 847 copyLCharsFromUCharSource(destination, source, length); | |
| 848 | |
| 849 return result; | |
| 850 } | |
| 851 | |
| 852 String String::make16BitFrom8BitSource(const LChar* source, size_t length) | |
| 853 { | |
| 854 if (!length) | |
| 855 return emptyString16Bit(); | |
| 856 | |
| 857 UChar* destination; | |
| 858 String result = String::createUninitialized(length, destination); | |
| 859 | |
| 860 StringImpl::copyChars(destination, source, length); | |
| 861 | |
| 862 return result; | |
| 863 } | |
| 864 | |
| 865 String String::fromUTF8(const LChar* stringStart, size_t length) | |
| 866 { | |
| 867 RELEASE_ASSERT(length <= std::numeric_limits<unsigned>::max()); | |
| 868 | |
| 869 if (!stringStart) | |
| 870 return String(); | |
| 871 | |
| 872 if (!length) | |
| 873 return emptyString(); | |
| 874 | |
| 875 if (charactersAreAllASCII(stringStart, length)) | |
| 876 return StringImpl::create(stringStart, length); | |
| 877 | |
| 878 Vector<UChar, 1024> buffer(length); | |
| 879 UChar* bufferStart = buffer.data(); | |
| 880 | |
| 881 UChar* bufferCurrent = bufferStart; | |
| 882 const char* stringCurrent = reinterpret_cast<const char*>(stringStart); | |
| 883 if (convertUTF8ToUTF16(&stringCurrent, reinterpret_cast<const char *>(string
Start + length), &bufferCurrent, bufferCurrent + buffer.size()) != conversionOK) | |
| 884 return String(); | |
| 885 | |
| 886 unsigned utf16Length = bufferCurrent - bufferStart; | |
| 887 ASSERT(utf16Length < length); | |
| 888 return StringImpl::create(bufferStart, utf16Length); | |
| 889 } | |
| 890 | |
| 891 String String::fromUTF8(const LChar* string) | |
| 892 { | |
| 893 if (!string) | |
| 894 return String(); | |
| 895 return fromUTF8(string, strlen(reinterpret_cast<const char*>(string))); | |
| 896 } | |
| 897 | |
| 898 String String::fromUTF8(const CString& s) | |
| 899 { | |
| 900 return fromUTF8(s.data()); | |
| 901 } | |
| 902 | |
| 903 String String::fromUTF8WithLatin1Fallback(const LChar* string, size_t size) | |
| 904 { | |
| 905 String utf8 = fromUTF8(string, size); | |
| 906 if (!utf8) | |
| 907 return String(string, size); | |
| 908 return utf8; | |
| 909 } | 898 } |
| 910 | 899 |
| 911 // String Operations | 900 // String Operations |
| 912 | 901 |
| 913 static bool isCharacterAllowedInBase(UChar c, int base) | 902 static bool isCharacterAllowedInBase(UChar c, int base) { |
| 914 { | 903 if (c > 0x7F) |
| 915 if (c > 0x7F) | 904 return false; |
| 916 return false; | 905 if (isASCIIDigit(c)) |
| 906 return c - '0' < base; |
| 907 if (isASCIIAlpha(c)) { |
| 908 if (base > 36) |
| 909 base = 36; |
| 910 return (c >= 'a' && c < 'a' + base - 10) || |
| 911 (c >= 'A' && c < 'A' + base - 10); |
| 912 } |
| 913 return false; |
| 914 } |
| 915 |
| 916 template <typename IntegralType, typename CharType> |
| 917 static inline IntegralType toIntegralType(const CharType* data, |
| 918 size_t length, |
| 919 bool* ok, |
| 920 int base) { |
| 921 static const IntegralType integralMax = |
| 922 std::numeric_limits<IntegralType>::max(); |
| 923 static const bool isSigned = std::numeric_limits<IntegralType>::is_signed; |
| 924 const IntegralType maxMultiplier = integralMax / base; |
| 925 |
| 926 IntegralType value = 0; |
| 927 bool isOk = false; |
| 928 bool isNegative = false; |
| 929 |
| 930 if (!data) |
| 931 goto bye; |
| 932 |
| 933 // skip leading whitespace |
| 934 while (length && isSpaceOrNewline(*data)) { |
| 935 --length; |
| 936 ++data; |
| 937 } |
| 938 |
| 939 if (isSigned && length && *data == '-') { |
| 940 --length; |
| 941 ++data; |
| 942 isNegative = true; |
| 943 } else if (length && *data == '+') { |
| 944 --length; |
| 945 ++data; |
| 946 } |
| 947 |
| 948 if (!length || !isCharacterAllowedInBase(*data, base)) |
| 949 goto bye; |
| 950 |
| 951 while (length && isCharacterAllowedInBase(*data, base)) { |
| 952 --length; |
| 953 IntegralType digitValue; |
| 954 CharType c = *data; |
| 917 if (isASCIIDigit(c)) | 955 if (isASCIIDigit(c)) |
| 918 return c - '0' < base; | 956 digitValue = c - '0'; |
| 919 if (isASCIIAlpha(c)) { | 957 else if (c >= 'a') |
| 920 if (base > 36) | 958 digitValue = c - 'a' + 10; |
| 921 base = 36; | 959 else |
| 922 return (c >= 'a' && c < 'a' + base - 10) | 960 digitValue = c - 'A' + 10; |
| 923 || (c >= 'A' && c < 'A' + base - 10); | 961 |
| 924 } | 962 if (value > maxMultiplier || |
| 925 return false; | 963 (value == maxMultiplier && |
| 926 } | 964 digitValue > (integralMax % base) + isNegative)) |
| 927 | 965 goto bye; |
| 928 template <typename IntegralType, typename CharType> | 966 |
| 929 static inline IntegralType toIntegralType(const CharType* data, size_t length, b
ool* ok, int base) | 967 value = base * value + digitValue; |
| 930 { | 968 ++data; |
| 931 static const IntegralType integralMax = std::numeric_limits<IntegralType>::m
ax(); | 969 } |
| 932 static const bool isSigned = std::numeric_limits<IntegralType>::is_signed; | |
| 933 const IntegralType maxMultiplier = integralMax / base; | |
| 934 | |
| 935 IntegralType value = 0; | |
| 936 bool isOk = false; | |
| 937 bool isNegative = false; | |
| 938 | |
| 939 if (!data) | |
| 940 goto bye; | |
| 941 | |
| 942 // skip leading whitespace | |
| 943 while (length && isSpaceOrNewline(*data)) { | |
| 944 --length; | |
| 945 ++data; | |
| 946 } | |
| 947 | |
| 948 if (isSigned && length && *data == '-') { | |
| 949 --length; | |
| 950 ++data; | |
| 951 isNegative = true; | |
| 952 } else if (length && *data == '+') { | |
| 953 --length; | |
| 954 ++data; | |
| 955 } | |
| 956 | |
| 957 if (!length || !isCharacterAllowedInBase(*data, base)) | |
| 958 goto bye; | |
| 959 | |
| 960 while (length && isCharacterAllowedInBase(*data, base)) { | |
| 961 --length; | |
| 962 IntegralType digitValue; | |
| 963 CharType c = *data; | |
| 964 if (isASCIIDigit(c)) | |
| 965 digitValue = c - '0'; | |
| 966 else if (c >= 'a') | |
| 967 digitValue = c - 'a' + 10; | |
| 968 else | |
| 969 digitValue = c - 'A' + 10; | |
| 970 | |
| 971 if (value > maxMultiplier || (value == maxMultiplier && digitValue > (in
tegralMax % base) + isNegative)) | |
| 972 goto bye; | |
| 973 | |
| 974 value = base * value + digitValue; | |
| 975 ++data; | |
| 976 } | |
| 977 | 970 |
| 978 #if COMPILER(MSVC) | 971 #if COMPILER(MSVC) |
| 979 #pragma warning(push, 0) | 972 #pragma warning(push, 0) |
| 980 #pragma warning(disable:4146) | 973 #pragma warning(disable : 4146) |
| 981 #endif | 974 #endif |
| 982 | 975 |
| 983 if (isNegative) | 976 if (isNegative) |
| 984 value = -value; | 977 value = -value; |
| 985 | 978 |
| 986 #if COMPILER(MSVC) | 979 #if COMPILER(MSVC) |
| 987 #pragma warning(pop) | 980 #pragma warning(pop) |
| 988 #endif | 981 #endif |
| 989 | 982 |
| 990 // skip trailing space | 983 // skip trailing space |
| 991 while (length && isSpaceOrNewline(*data)) { | 984 while (length && isSpaceOrNewline(*data)) { |
| 992 --length; | 985 --length; |
| 993 ++data; | 986 ++data; |
| 994 } | 987 } |
| 995 | 988 |
| 996 if (!length) | 989 if (!length) |
| 997 isOk = true; | 990 isOk = true; |
| 998 bye: | 991 bye: |
| 992 if (ok) |
| 993 *ok = isOk; |
| 994 return isOk ? value : 0; |
| 995 } |
| 996 |
| 997 template <typename CharType> |
| 998 static unsigned lengthOfCharactersAsInteger(const CharType* data, |
| 999 size_t length) { |
| 1000 size_t i = 0; |
| 1001 |
| 1002 // Allow leading spaces. |
| 1003 for (; i != length; ++i) { |
| 1004 if (!isSpaceOrNewline(data[i])) |
| 1005 break; |
| 1006 } |
| 1007 |
| 1008 // Allow sign. |
| 1009 if (i != length && (data[i] == '+' || data[i] == '-')) |
| 1010 ++i; |
| 1011 |
| 1012 // Allow digits. |
| 1013 for (; i != length; ++i) { |
| 1014 if (!isASCIIDigit(data[i])) |
| 1015 break; |
| 1016 } |
| 1017 |
| 1018 return i; |
| 1019 } |
| 1020 |
| 1021 int charactersToIntStrict(const LChar* data, |
| 1022 size_t length, |
| 1023 bool* ok, |
| 1024 int base) { |
| 1025 return toIntegralType<int, LChar>(data, length, ok, base); |
| 1026 } |
| 1027 |
| 1028 int charactersToIntStrict(const UChar* data, |
| 1029 size_t length, |
| 1030 bool* ok, |
| 1031 int base) { |
| 1032 return toIntegralType<int, UChar>(data, length, ok, base); |
| 1033 } |
| 1034 |
| 1035 unsigned charactersToUIntStrict(const LChar* data, |
| 1036 size_t length, |
| 1037 bool* ok, |
| 1038 int base) { |
| 1039 return toIntegralType<unsigned, LChar>(data, length, ok, base); |
| 1040 } |
| 1041 |
| 1042 unsigned charactersToUIntStrict(const UChar* data, |
| 1043 size_t length, |
| 1044 bool* ok, |
| 1045 int base) { |
| 1046 return toIntegralType<unsigned, UChar>(data, length, ok, base); |
| 1047 } |
| 1048 |
| 1049 int64_t charactersToInt64Strict(const LChar* data, |
| 1050 size_t length, |
| 1051 bool* ok, |
| 1052 int base) { |
| 1053 return toIntegralType<int64_t, LChar>(data, length, ok, base); |
| 1054 } |
| 1055 |
| 1056 int64_t charactersToInt64Strict(const UChar* data, |
| 1057 size_t length, |
| 1058 bool* ok, |
| 1059 int base) { |
| 1060 return toIntegralType<int64_t, UChar>(data, length, ok, base); |
| 1061 } |
| 1062 |
| 1063 uint64_t charactersToUInt64Strict(const LChar* data, |
| 1064 size_t length, |
| 1065 bool* ok, |
| 1066 int base) { |
| 1067 return toIntegralType<uint64_t, LChar>(data, length, ok, base); |
| 1068 } |
| 1069 |
| 1070 uint64_t charactersToUInt64Strict(const UChar* data, |
| 1071 size_t length, |
| 1072 bool* ok, |
| 1073 int base) { |
| 1074 return toIntegralType<uint64_t, UChar>(data, length, ok, base); |
| 1075 } |
| 1076 |
| 1077 int charactersToInt(const LChar* data, size_t length, bool* ok) { |
| 1078 return toIntegralType<int, LChar>( |
| 1079 data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10); |
| 1080 } |
| 1081 |
| 1082 int charactersToInt(const UChar* data, size_t length, bool* ok) { |
| 1083 return toIntegralType<int, UChar>( |
| 1084 data, lengthOfCharactersAsInteger(data, length), ok, 10); |
| 1085 } |
| 1086 |
| 1087 unsigned charactersToUInt(const LChar* data, size_t length, bool* ok) { |
| 1088 return toIntegralType<unsigned, LChar>( |
| 1089 data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10); |
| 1090 } |
| 1091 |
| 1092 unsigned charactersToUInt(const UChar* data, size_t length, bool* ok) { |
| 1093 return toIntegralType<unsigned, UChar>( |
| 1094 data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10); |
| 1095 } |
| 1096 |
| 1097 int64_t charactersToInt64(const LChar* data, size_t length, bool* ok) { |
| 1098 return toIntegralType<int64_t, LChar>( |
| 1099 data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10); |
| 1100 } |
| 1101 |
| 1102 int64_t charactersToInt64(const UChar* data, size_t length, bool* ok) { |
| 1103 return toIntegralType<int64_t, UChar>( |
| 1104 data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10); |
| 1105 } |
| 1106 |
| 1107 uint64_t charactersToUInt64(const LChar* data, size_t length, bool* ok) { |
| 1108 return toIntegralType<uint64_t, LChar>( |
| 1109 data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10); |
| 1110 } |
| 1111 |
| 1112 uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok) { |
| 1113 return toIntegralType<uint64_t, UChar>( |
| 1114 data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10); |
| 1115 } |
| 1116 |
| 1117 enum TrailingJunkPolicy { DisallowTrailingJunk, AllowTrailingJunk }; |
| 1118 |
| 1119 template <typename CharType, TrailingJunkPolicy policy> |
| 1120 static inline double toDoubleType(const CharType* data, |
| 1121 size_t length, |
| 1122 bool* ok, |
| 1123 size_t& parsedLength) { |
| 1124 size_t leadingSpacesLength = 0; |
| 1125 while (leadingSpacesLength < length && |
| 1126 isASCIISpace(data[leadingSpacesLength])) |
| 1127 ++leadingSpacesLength; |
| 1128 |
| 1129 double number = parseDouble(data + leadingSpacesLength, |
| 1130 length - leadingSpacesLength, parsedLength); |
| 1131 if (!parsedLength) { |
| 999 if (ok) | 1132 if (ok) |
| 1000 *ok = isOk; | 1133 *ok = false; |
| 1001 return isOk ? value : 0; | 1134 return 0.0; |
| 1002 } | 1135 } |
| 1003 | 1136 |
| 1004 template <typename CharType> | 1137 parsedLength += leadingSpacesLength; |
| 1005 static unsigned lengthOfCharactersAsInteger(const CharType* data, size_t length) | 1138 if (ok) |
| 1006 { | 1139 *ok = policy == AllowTrailingJunk || parsedLength == length; |
| 1007 size_t i = 0; | 1140 return number; |
| 1008 | 1141 } |
| 1009 // Allow leading spaces. | 1142 |
| 1010 for (; i != length; ++i) { | 1143 double charactersToDouble(const LChar* data, size_t length, bool* ok) { |
| 1011 if (!isSpaceOrNewline(data[i])) | 1144 size_t parsedLength; |
| 1012 break; | 1145 return toDoubleType<LChar, DisallowTrailingJunk>(data, length, ok, |
| 1013 } | 1146 parsedLength); |
| 1014 | 1147 } |
| 1015 // Allow sign. | 1148 |
| 1016 if (i != length && (data[i] == '+' || data[i] == '-')) | 1149 double charactersToDouble(const UChar* data, size_t length, bool* ok) { |
| 1017 ++i; | 1150 size_t parsedLength; |
| 1018 | 1151 return toDoubleType<UChar, DisallowTrailingJunk>(data, length, ok, |
| 1019 // Allow digits. | 1152 parsedLength); |
| 1020 for (; i != length; ++i) { | 1153 } |
| 1021 if (!isASCIIDigit(data[i])) | 1154 |
| 1022 break; | 1155 float charactersToFloat(const LChar* data, size_t length, bool* ok) { |
| 1023 } | 1156 // FIXME: This will return ok even when the string fits into a double but |
| 1024 | 1157 // not a float. |
| 1025 return i; | 1158 size_t parsedLength; |
| 1026 } | 1159 return static_cast<float>(toDoubleType<LChar, DisallowTrailingJunk>( |
| 1027 | 1160 data, length, ok, parsedLength)); |
| 1028 int charactersToIntStrict(const LChar* data, size_t length, bool* ok, int base) | 1161 } |
| 1029 { | 1162 |
| 1030 return toIntegralType<int, LChar>(data, length, ok, base); | 1163 float charactersToFloat(const UChar* data, size_t length, bool* ok) { |
| 1031 } | 1164 // FIXME: This will return ok even when the string fits into a double but |
| 1032 | 1165 // not a float. |
| 1033 int charactersToIntStrict(const UChar* data, size_t length, bool* ok, int base) | 1166 size_t parsedLength; |
| 1034 { | 1167 return static_cast<float>(toDoubleType<UChar, DisallowTrailingJunk>( |
| 1035 return toIntegralType<int, UChar>(data, length, ok, base); | 1168 data, length, ok, parsedLength)); |
| 1036 } | 1169 } |
| 1037 | 1170 |
| 1038 unsigned charactersToUIntStrict(const LChar* data, size_t length, bool* ok, int
base) | 1171 float charactersToFloat(const LChar* data, |
| 1039 { | 1172 size_t length, |
| 1040 return toIntegralType<unsigned, LChar>(data, length, ok, base); | 1173 size_t& parsedLength) { |
| 1041 } | 1174 // FIXME: This will return ok even when the string fits into a double but |
| 1042 | 1175 // not a float. |
| 1043 unsigned charactersToUIntStrict(const UChar* data, size_t length, bool* ok, int
base) | 1176 return static_cast<float>( |
| 1044 { | 1177 toDoubleType<LChar, AllowTrailingJunk>(data, length, 0, parsedLength)); |
| 1045 return toIntegralType<unsigned, UChar>(data, length, ok, base); | 1178 } |
| 1046 } | 1179 |
| 1047 | 1180 float charactersToFloat(const UChar* data, |
| 1048 int64_t charactersToInt64Strict(const LChar* data, size_t length, bool* ok, int
base) | 1181 size_t length, |
| 1049 { | 1182 size_t& parsedLength) { |
| 1050 return toIntegralType<int64_t, LChar>(data, length, ok, base); | 1183 // FIXME: This will return ok even when the string fits into a double but |
| 1051 } | 1184 // not a float. |
| 1052 | 1185 return static_cast<float>( |
| 1053 int64_t charactersToInt64Strict(const UChar* data, size_t length, bool* ok, int
base) | 1186 toDoubleType<UChar, AllowTrailingJunk>(data, length, 0, parsedLength)); |
| 1054 { | 1187 } |
| 1055 return toIntegralType<int64_t, UChar>(data, length, ok, base); | 1188 |
| 1056 } | 1189 const String& emptyString() { |
| 1057 | 1190 DEFINE_STATIC_LOCAL(String, emptyString, (StringImpl::empty())); |
| 1058 uint64_t charactersToUInt64Strict(const LChar* data, size_t length, bool* ok, in
t base) | 1191 return emptyString; |
| 1059 { | 1192 } |
| 1060 return toIntegralType<uint64_t, LChar>(data, length, ok, base); | 1193 |
| 1061 } | 1194 const String& emptyString16Bit() { |
| 1062 | 1195 DEFINE_STATIC_LOCAL(String, emptyString, (StringImpl::empty16Bit())); |
| 1063 uint64_t charactersToUInt64Strict(const UChar* data, size_t length, bool* ok, in
t base) | 1196 return emptyString; |
| 1064 { | 1197 } |
| 1065 return toIntegralType<uint64_t, UChar>(data, length, ok, base); | 1198 |
| 1066 } | 1199 } // namespace WTF |
| 1067 | |
| 1068 int charactersToInt(const LChar* data, size_t length, bool* ok) | |
| 1069 { | |
| 1070 return toIntegralType<int, LChar>(data, lengthOfCharactersAsInteger<LChar>(d
ata, length), ok, 10); | |
| 1071 } | |
| 1072 | |
| 1073 int charactersToInt(const UChar* data, size_t length, bool* ok) | |
| 1074 { | |
| 1075 return toIntegralType<int, UChar>(data, lengthOfCharactersAsInteger(data, le
ngth), ok, 10); | |
| 1076 } | |
| 1077 | |
| 1078 unsigned charactersToUInt(const LChar* data, size_t length, bool* ok) | |
| 1079 { | |
| 1080 return toIntegralType<unsigned, LChar>(data, lengthOfCharactersAsInteger<LCh
ar>(data, length), ok, 10); | |
| 1081 } | |
| 1082 | |
| 1083 unsigned charactersToUInt(const UChar* data, size_t length, bool* ok) | |
| 1084 { | |
| 1085 return toIntegralType<unsigned, UChar>(data, lengthOfCharactersAsInteger<UCh
ar>(data, length), ok, 10); | |
| 1086 } | |
| 1087 | |
| 1088 int64_t charactersToInt64(const LChar* data, size_t length, bool* ok) | |
| 1089 { | |
| 1090 return toIntegralType<int64_t, LChar>(data, lengthOfCharactersAsInteger<LCha
r>(data, length), ok, 10); | |
| 1091 } | |
| 1092 | |
| 1093 int64_t charactersToInt64(const UChar* data, size_t length, bool* ok) | |
| 1094 { | |
| 1095 return toIntegralType<int64_t, UChar>(data, lengthOfCharactersAsInteger<UCha
r>(data, length), ok, 10); | |
| 1096 } | |
| 1097 | |
| 1098 uint64_t charactersToUInt64(const LChar* data, size_t length, bool* ok) | |
| 1099 { | |
| 1100 return toIntegralType<uint64_t, LChar>(data, lengthOfCharactersAsInteger<LCh
ar>(data, length), ok, 10); | |
| 1101 } | |
| 1102 | |
| 1103 uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok) | |
| 1104 { | |
| 1105 return toIntegralType<uint64_t, UChar>(data, lengthOfCharactersAsInteger<UCh
ar>(data, length), ok, 10); | |
| 1106 } | |
| 1107 | |
| 1108 enum TrailingJunkPolicy { DisallowTrailingJunk, AllowTrailingJunk }; | |
| 1109 | |
| 1110 template <typename CharType, TrailingJunkPolicy policy> | |
| 1111 static inline double toDoubleType(const CharType* data, size_t length, bool* ok,
size_t& parsedLength) | |
| 1112 { | |
| 1113 size_t leadingSpacesLength = 0; | |
| 1114 while (leadingSpacesLength < length && isASCIISpace(data[leadingSpacesLength
])) | |
| 1115 ++leadingSpacesLength; | |
| 1116 | |
| 1117 double number = parseDouble(data + leadingSpacesLength, length - leadingSpac
esLength, parsedLength); | |
| 1118 if (!parsedLength) { | |
| 1119 if (ok) | |
| 1120 *ok = false; | |
| 1121 return 0.0; | |
| 1122 } | |
| 1123 | |
| 1124 parsedLength += leadingSpacesLength; | |
| 1125 if (ok) | |
| 1126 *ok = policy == AllowTrailingJunk || parsedLength == length; | |
| 1127 return number; | |
| 1128 } | |
| 1129 | |
| 1130 double charactersToDouble(const LChar* data, size_t length, bool* ok) | |
| 1131 { | |
| 1132 size_t parsedLength; | |
| 1133 return toDoubleType<LChar, DisallowTrailingJunk>(data, length, ok, parsedLen
gth); | |
| 1134 } | |
| 1135 | |
| 1136 double charactersToDouble(const UChar* data, size_t length, bool* ok) | |
| 1137 { | |
| 1138 size_t parsedLength; | |
| 1139 return toDoubleType<UChar, DisallowTrailingJunk>(data, length, ok, parsedLen
gth); | |
| 1140 } | |
| 1141 | |
| 1142 float charactersToFloat(const LChar* data, size_t length, bool* ok) | |
| 1143 { | |
| 1144 // FIXME: This will return ok even when the string fits into a double but | |
| 1145 // not a float. | |
| 1146 size_t parsedLength; | |
| 1147 return static_cast<float>(toDoubleType<LChar, DisallowTrailingJunk>(data, le
ngth, ok, parsedLength)); | |
| 1148 } | |
| 1149 | |
| 1150 float charactersToFloat(const UChar* data, size_t length, bool* ok) | |
| 1151 { | |
| 1152 // FIXME: This will return ok even when the string fits into a double but | |
| 1153 // not a float. | |
| 1154 size_t parsedLength; | |
| 1155 return static_cast<float>(toDoubleType<UChar, DisallowTrailingJunk>(data, le
ngth, ok, parsedLength)); | |
| 1156 } | |
| 1157 | |
| 1158 float charactersToFloat(const LChar* data, size_t length, size_t& parsedLength) | |
| 1159 { | |
| 1160 // FIXME: This will return ok even when the string fits into a double but | |
| 1161 // not a float. | |
| 1162 return static_cast<float>(toDoubleType<LChar, AllowTrailingJunk>(data, lengt
h, 0, parsedLength)); | |
| 1163 } | |
| 1164 | |
| 1165 float charactersToFloat(const UChar* data, size_t length, size_t& parsedLength) | |
| 1166 { | |
| 1167 // FIXME: This will return ok even when the string fits into a double but | |
| 1168 // not a float. | |
| 1169 return static_cast<float>(toDoubleType<UChar, AllowTrailingJunk>(data, lengt
h, 0, parsedLength)); | |
| 1170 } | |
| 1171 | |
| 1172 const String& emptyString() | |
| 1173 { | |
| 1174 DEFINE_STATIC_LOCAL(String, emptyString, (StringImpl::empty())); | |
| 1175 return emptyString; | |
| 1176 } | |
| 1177 | |
| 1178 const String& emptyString16Bit() | |
| 1179 { | |
| 1180 DEFINE_STATIC_LOCAL(String, emptyString, (StringImpl::empty16Bit())); | |
| 1181 return emptyString; | |
| 1182 } | |
| 1183 | |
| 1184 } // namespace WTF | |
| 1185 | 1200 |
| 1186 #ifndef NDEBUG | 1201 #ifndef NDEBUG |
| 1187 // For use in the debugger | 1202 // For use in the debugger |
| 1188 String* string(const char*); | 1203 String* string(const char*); |
| 1189 Vector<char> asciiDebug(StringImpl*); | 1204 Vector<char> asciiDebug(StringImpl*); |
| 1190 Vector<char> asciiDebug(String&); | 1205 Vector<char> asciiDebug(String&); |
| 1191 | 1206 |
| 1192 void String::show() const | 1207 void String::show() const { |
| 1193 { | 1208 dataLogF("%s\n", asciiDebug(impl()).data()); |
| 1194 dataLogF("%s\n", asciiDebug(impl()).data()); | |
| 1195 } | 1209 } |
| 1196 | 1210 |
| 1197 String* string(const char* s) | 1211 String* string(const char* s) { |
| 1198 { | 1212 // leaks memory! |
| 1199 // leaks memory! | 1213 return new String(s); |
| 1200 return new String(s); | |
| 1201 } | 1214 } |
| 1202 | 1215 |
| 1203 Vector<char> asciiDebug(StringImpl* impl) | 1216 Vector<char> asciiDebug(StringImpl* impl) { |
| 1204 { | 1217 if (!impl) |
| 1205 if (!impl) | 1218 return asciiDebug(String("[null]").impl()); |
| 1206 return asciiDebug(String("[null]").impl()); | |
| 1207 | 1219 |
| 1208 Vector<char> buffer; | 1220 Vector<char> buffer; |
| 1209 for (unsigned i = 0; i < impl->length(); ++i) { | 1221 for (unsigned i = 0; i < impl->length(); ++i) { |
| 1210 UChar ch = (*impl)[i]; | 1222 UChar ch = (*impl)[i]; |
| 1211 if (isASCIIPrintable(ch)) { | 1223 if (isASCIIPrintable(ch)) { |
| 1212 if (ch == '\\') | 1224 if (ch == '\\') |
| 1213 buffer.append('\\'); | 1225 buffer.append('\\'); |
| 1214 buffer.append(static_cast<char>(ch)); | 1226 buffer.append(static_cast<char>(ch)); |
| 1215 } else { | 1227 } else { |
| 1216 buffer.append('\\'); | 1228 buffer.append('\\'); |
| 1217 buffer.append('u'); | 1229 buffer.append('u'); |
| 1218 appendUnsignedAsHexFixedSize(ch, buffer, 4); | 1230 appendUnsignedAsHexFixedSize(ch, buffer, 4); |
| 1219 } | |
| 1220 } | 1231 } |
| 1221 buffer.append('\0'); | 1232 } |
| 1222 return buffer; | 1233 buffer.append('\0'); |
| 1234 return buffer; |
| 1223 } | 1235 } |
| 1224 | 1236 |
| 1225 Vector<char> asciiDebug(String& string) | 1237 Vector<char> asciiDebug(String& string) { |
| 1226 { | 1238 return asciiDebug(string.impl()); |
| 1227 return asciiDebug(string.impl()); | |
| 1228 } | 1239 } |
| 1229 | 1240 |
| 1230 #endif | 1241 #endif |
| OLD | NEW |