Index: Source/WTF/wtf/text/WTFString.cpp |
diff --git a/Source/WTF/wtf/text/WTFString.cpp b/Source/WTF/wtf/text/WTFString.cpp |
deleted file mode 100644 |
index 5a6113b953faff5b47d02c8b5172a405c89ca209..0000000000000000000000000000000000000000 |
--- a/Source/WTF/wtf/text/WTFString.cpp |
+++ /dev/null |
@@ -1,1248 +0,0 @@ |
-/* |
- * (C) 1999 Lars Knoll (knoll@kde.org) |
- * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010, 2012 Apple Inc. All rights reserved. |
- * Copyright (C) 2007-2009 Torch Mobile, Inc. |
- * |
- * This library is free software; you can redistribute it and/or |
- * modify it under the terms of the GNU Library General Public |
- * License as published by the Free Software Foundation; either |
- * version 2 of the License, or (at your option) any later version. |
- * |
- * This library is distributed in the hope that it will be useful, |
- * but WITHOUT ANY WARRANTY; without even the implied warranty of |
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
- * Library General Public License for more details. |
- * |
- * You should have received a copy of the GNU Library General Public License |
- * along with this library; see the file COPYING.LIB. If not, write to |
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
- * Boston, MA 02110-1301, USA. |
- */ |
- |
-#include "config.h" |
-#include "WTFString.h" |
- |
-#include "IntegerToStringConversion.h" |
-#include <stdarg.h> |
-#include <wtf/ASCIICType.h> |
-#include <wtf/DataLog.h> |
-#include <wtf/HexNumber.h> |
-#include <wtf/MathExtras.h> |
-#include <wtf/text/CString.h> |
-#include <wtf/StringExtras.h> |
-#include <wtf/Vector.h> |
-#include <wtf/dtoa.h> |
-#include <wtf/unicode/CharacterNames.h> |
-#include <wtf/unicode/UTF8.h> |
-#include <wtf/unicode/Unicode.h> |
- |
-using namespace std; |
- |
-namespace WTF { |
- |
-using namespace Unicode; |
-using namespace std; |
- |
-// Construct a string with UTF-16 data. |
-String::String(const UChar* characters, unsigned length) |
- : m_impl(characters ? StringImpl::create(characters, length) : 0) |
-{ |
-} |
- |
-// Construct a string with UTF-16 data, from a null-terminated source. |
-String::String(const UChar* str) |
-{ |
- if (!str) |
- return; |
- |
- size_t len = 0; |
- while (str[len] != UChar(0)) |
- ++len; |
- |
- RELEASE_ASSERT(len <= numeric_limits<unsigned>::max()); |
- |
- m_impl = StringImpl::create(str, len); |
-} |
- |
-// Construct a string with latin1 data. |
-String::String(const LChar* characters, unsigned length) |
- : m_impl(characters ? StringImpl::create(characters, length) : 0) |
-{ |
-} |
- |
-String::String(const char* characters, unsigned length) |
- : m_impl(characters ? StringImpl::create(reinterpret_cast<const LChar*>(characters), length) : 0) |
-{ |
-} |
- |
-// Construct a string with latin1 data, from a null-terminated source. |
-String::String(const LChar* characters) |
- : m_impl(characters ? StringImpl::create(characters) : 0) |
-{ |
-} |
- |
-String::String(const char* characters) |
- : m_impl(characters ? StringImpl::create(reinterpret_cast<const LChar*>(characters)) : 0) |
-{ |
-} |
- |
-String::String(ASCIILiteral characters) |
- : m_impl(StringImpl::createFromLiteral(characters)) |
-{ |
-} |
- |
-void String::append(const String& str) |
-{ |
- if (str.isEmpty()) |
- return; |
- |
- // FIXME: This is extremely inefficient. So much so that we might want to take this |
- // out of String's API. We can make it better by optimizing the case where exactly |
- // one String is pointing at this StringImpl, but even then it's going to require a |
- // call to fastMalloc every single time. |
- if (str.m_impl) { |
- if (m_impl) { |
- if (m_impl->is8Bit() && str.m_impl->is8Bit()) { |
- LChar* data; |
- RELEASE_ASSERT(str.length() <= numeric_limits<unsigned>::max() - m_impl->length()); |
- RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + str.length(), data); |
- memcpy(data, m_impl->characters8(), m_impl->length() * sizeof(LChar)); |
- memcpy(data + m_impl->length(), str.characters8(), str.length() * sizeof(LChar)); |
- m_impl = newImpl.release(); |
- return; |
- } |
- UChar* data; |
- RELEASE_ASSERT(str.length() <= numeric_limits<unsigned>::max() - m_impl->length()); |
- RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + str.length(), data); |
- memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar)); |
- memcpy(data + m_impl->length(), str.characters(), str.length() * sizeof(UChar)); |
- m_impl = newImpl.release(); |
- } else |
- m_impl = str.m_impl; |
- } |
-} |
- |
-void String::append(LChar c) |
-{ |
- // FIXME: This is extremely inefficient. So much so that we might want to take this |
- // out of String's API. We can make it better by optimizing the case where exactly |
- // one String is pointing at this StringImpl, but even then it's going to require a |
- // call to fastMalloc every single time. |
- if (m_impl) { |
- UChar* data; |
- RELEASE_ASSERT(m_impl->length() < numeric_limits<unsigned>::max()); |
- RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data); |
- memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar)); |
- data[m_impl->length()] = c; |
- m_impl = newImpl.release(); |
- } else |
- m_impl = StringImpl::create(&c, 1); |
-} |
- |
-void String::append(UChar c) |
-{ |
- // FIXME: This is extremely inefficient. So much so that we might want to take this |
- // out of String's API. We can make it better by optimizing the case where exactly |
- // one String is pointing at this StringImpl, but even then it's going to require a |
- // call to fastMalloc every single time. |
- if (m_impl) { |
- UChar* data; |
- RELEASE_ASSERT(m_impl->length() < numeric_limits<unsigned>::max()); |
- RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data); |
- memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar)); |
- data[m_impl->length()] = c; |
- m_impl = newImpl.release(); |
- } else |
- m_impl = StringImpl::create(&c, 1); |
-} |
- |
-int codePointCompare(const String& a, const String& b) |
-{ |
- return codePointCompare(a.impl(), b.impl()); |
-} |
- |
-void String::insert(const String& str, unsigned pos) |
-{ |
- if (str.isEmpty()) { |
- if (str.isNull()) |
- return; |
- if (isNull()) |
- m_impl = str.impl(); |
- return; |
- } |
- insert(str.characters(), str.length(), pos); |
-} |
- |
-void String::append(const LChar* charactersToAppend, unsigned lengthToAppend) |
-{ |
- if (!m_impl) { |
- if (!charactersToAppend) |
- return; |
- m_impl = StringImpl::create(charactersToAppend, lengthToAppend); |
- return; |
- } |
- |
- if (!lengthToAppend) |
- return; |
- |
- ASSERT(charactersToAppend); |
- |
- unsigned strLength = m_impl->length(); |
- |
- if (m_impl->is8Bit()) { |
- RELEASE_ASSERT(lengthToAppend <= numeric_limits<unsigned>::max() - strLength); |
- LChar* data; |
- RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(strLength + lengthToAppend, data); |
- StringImpl::copyChars(data, m_impl->characters8(), strLength); |
- StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppend); |
- m_impl = newImpl.release(); |
- return; |
- } |
- |
- RELEASE_ASSERT(lengthToAppend <= numeric_limits<unsigned>::max() - strLength); |
- UChar* data; |
- RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() + lengthToAppend, data); |
- StringImpl::copyChars(data, m_impl->characters16(), strLength); |
- StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppend); |
- m_impl = newImpl.release(); |
-} |
- |
-void String::append(const UChar* charactersToAppend, unsigned lengthToAppend) |
-{ |
- if (!m_impl) { |
- if (!charactersToAppend) |
- return; |
- m_impl = StringImpl::create(charactersToAppend, lengthToAppend); |
- return; |
- } |
- |
- if (!lengthToAppend) |
- return; |
- |
- unsigned strLength = m_impl->length(); |
- |
- ASSERT(charactersToAppend); |
- RELEASE_ASSERT(lengthToAppend <= numeric_limits<unsigned>::max() - strLength); |
- UChar* data; |
- RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(strLength + lengthToAppend, data); |
- if (m_impl->is8Bit()) |
- StringImpl::copyChars(data, characters8(), strLength); |
- else |
- StringImpl::copyChars(data, characters16(), strLength); |
- StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppend); |
- m_impl = newImpl.release(); |
-} |
- |
- |
-void String::insert(const UChar* charactersToInsert, unsigned lengthToInsert, unsigned position) |
-{ |
- if (position >= length()) { |
- append(charactersToInsert, lengthToInsert); |
- return; |
- } |
- |
- ASSERT(m_impl); |
- |
- if (!lengthToInsert) |
- return; |
- |
- ASSERT(charactersToInsert); |
- UChar* data; |
- RELEASE_ASSERT(lengthToInsert <= numeric_limits<unsigned>::max() - length()); |
- RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() + lengthToInsert, data); |
- memcpy(data, characters(), position * sizeof(UChar)); |
- memcpy(data + position, charactersToInsert, lengthToInsert * sizeof(UChar)); |
- memcpy(data + position + lengthToInsert, characters() + position, (length() - position) * sizeof(UChar)); |
- m_impl = newImpl.release(); |
-} |
- |
-UChar32 String::characterStartingAt(unsigned i) const |
-{ |
- if (!m_impl || i >= m_impl->length()) |
- return 0; |
- return m_impl->characterStartingAt(i); |
-} |
- |
-void String::truncate(unsigned position) |
-{ |
- if (position >= length()) |
- return; |
- UChar* data; |
- RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(position, data); |
- memcpy(data, characters(), position * sizeof(UChar)); |
- m_impl = newImpl.release(); |
-} |
- |
-template <typename CharacterType> |
-inline void String::removeInternal(const CharacterType* characters, unsigned position, int lengthToRemove) |
-{ |
- CharacterType* data; |
- RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() - lengthToRemove, data); |
- memcpy(data, characters, position * sizeof(CharacterType)); |
- memcpy(data + position, characters + position + lengthToRemove, |
- (length() - lengthToRemove - position) * sizeof(CharacterType)); |
- |
- m_impl = newImpl.release(); |
-} |
- |
-void String::remove(unsigned position, int lengthToRemove) |
-{ |
- if (lengthToRemove <= 0) |
- return; |
- if (position >= length()) |
- return; |
- if (static_cast<unsigned>(lengthToRemove) > length() - position) |
- lengthToRemove = length() - position; |
- |
- if (is8Bit()) { |
- removeInternal(characters8(), position, lengthToRemove); |
- |
- return; |
- } |
- |
- removeInternal(characters16(), position, lengthToRemove); |
-} |
- |
-String String::substring(unsigned pos, unsigned len) const |
-{ |
- if (!m_impl) |
- return String(); |
- return m_impl->substring(pos, len); |
-} |
- |
-String String::substringSharingImpl(unsigned offset, unsigned length) const |
-{ |
- // FIXME: We used to check against a limit of Heap::minExtraCost / sizeof(UChar). |
- |
- unsigned stringLength = this->length(); |
- offset = min(offset, stringLength); |
- length = min(length, stringLength - offset); |
- |
- if (!offset && length == stringLength) |
- return *this; |
- return String(StringImpl::create(m_impl, offset, length)); |
-} |
- |
-String String::lower() const |
-{ |
- if (!m_impl) |
- return String(); |
- return m_impl->lower(); |
-} |
- |
-String String::upper() const |
-{ |
- if (!m_impl) |
- return String(); |
- return m_impl->upper(); |
-} |
- |
-String String::stripWhiteSpace() const |
-{ |
- if (!m_impl) |
- return String(); |
- return m_impl->stripWhiteSpace(); |
-} |
- |
-String String::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace) const |
-{ |
- if (!m_impl) |
- return String(); |
- return m_impl->stripWhiteSpace(isWhiteSpace); |
-} |
- |
-String String::simplifyWhiteSpace() const |
-{ |
- if (!m_impl) |
- return String(); |
- return m_impl->simplifyWhiteSpace(); |
-} |
- |
-String String::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace) const |
-{ |
- if (!m_impl) |
- return String(); |
- return m_impl->simplifyWhiteSpace(isWhiteSpace); |
-} |
- |
-String String::removeCharacters(CharacterMatchFunctionPtr findMatch) const |
-{ |
- if (!m_impl) |
- return String(); |
- return m_impl->removeCharacters(findMatch); |
-} |
- |
-String String::foldCase() const |
-{ |
- if (!m_impl) |
- return String(); |
- return m_impl->foldCase(); |
-} |
- |
-bool String::percentage(int& result) const |
-{ |
- if (!m_impl || !m_impl->length()) |
- return false; |
- |
- if ((*m_impl)[m_impl->length() - 1] != '%') |
- return false; |
- |
- result = charactersToIntStrict(m_impl->characters(), m_impl->length() - 1); |
- return true; |
-} |
- |
-const UChar* String::charactersWithNullTermination() |
-{ |
- if (!m_impl) |
- return 0; |
- if (m_impl->hasTerminatingNullCharacter()) |
- return m_impl->characters(); |
- m_impl = StringImpl::createWithTerminatingNullCharacter(*m_impl); |
- return m_impl->characters(); |
-} |
- |
-String String::format(const char *format, ...) |
-{ |
-#if OS(WINCE) |
- va_list args; |
- va_start(args, format); |
- |
- Vector<char, 256> buffer; |
- |
- int bufferSize = 256; |
- buffer.resize(bufferSize); |
- for (;;) { |
- int written = vsnprintf(buffer.data(), bufferSize, format, args); |
- va_end(args); |
- |
- if (written == 0) |
- return String(""); |
- if (written > 0) |
- return StringImpl::create(reinterpret_cast<const LChar*>(buffer.data()), written); |
- |
- bufferSize <<= 1; |
- buffer.resize(bufferSize); |
- va_start(args, format); |
- } |
- |
-#else |
- va_list args; |
- va_start(args, format); |
- |
- Vector<char, 256> buffer; |
- |
- // Do the format once to get the length. |
-#if COMPILER(MSVC) |
- int result = _vscprintf(format, args); |
-#else |
- char ch; |
- int result = vsnprintf(&ch, 1, format, args); |
- // We need to call va_end() and then va_start() again here, as the |
- // contents of args is undefined after the call to vsnprintf |
- // according to http://man.cx/snprintf(3) |
- // |
- // Not calling va_end/va_start here happens to work on lots of |
- // systems, but fails e.g. on 64bit Linux. |
- va_end(args); |
- va_start(args, format); |
-#endif |
- |
- if (result == 0) |
- return String(""); |
- if (result < 0) |
- return String(); |
- unsigned len = result; |
- buffer.grow(len + 1); |
- |
- // Now do the formatting again, guaranteed to fit. |
- vsnprintf(buffer.data(), buffer.size(), format, args); |
- |
- va_end(args); |
- |
- return StringImpl::create(reinterpret_cast<const LChar*>(buffer.data()), len); |
-#endif |
-} |
- |
-String String::number(int number) |
-{ |
- return numberToStringSigned<String>(number); |
-} |
- |
-String String::number(unsigned int number) |
-{ |
- return numberToStringUnsigned<String>(number); |
-} |
- |
-String String::number(long number) |
-{ |
- return numberToStringSigned<String>(number); |
-} |
- |
-String String::number(unsigned long number) |
-{ |
- return numberToStringUnsigned<String>(number); |
-} |
- |
-String String::number(long long number) |
-{ |
- return numberToStringSigned<String>(number); |
-} |
- |
-String String::number(unsigned long long number) |
-{ |
- return numberToStringUnsigned<String>(number); |
-} |
- |
-String String::number(double number, unsigned precision, TrailingZerosTruncatingPolicy trailingZerosTruncatingPolicy) |
-{ |
- NumberToStringBuffer buffer; |
- return String(numberToFixedPrecisionString(number, precision, buffer, trailingZerosTruncatingPolicy == TruncateTrailingZeros)); |
-} |
- |
-String String::numberToStringECMAScript(double number) |
-{ |
- NumberToStringBuffer buffer; |
- return String(numberToString(number, buffer)); |
-} |
- |
-String String::numberToStringFixedWidth(double number, unsigned decimalPlaces) |
-{ |
- NumberToStringBuffer buffer; |
- return String(numberToFixedWidthString(number, decimalPlaces, buffer)); |
-} |
- |
-int String::toIntStrict(bool* ok, int base) const |
-{ |
- if (!m_impl) { |
- if (ok) |
- *ok = false; |
- return 0; |
- } |
- return m_impl->toIntStrict(ok, base); |
-} |
- |
-unsigned String::toUIntStrict(bool* ok, int base) const |
-{ |
- if (!m_impl) { |
- if (ok) |
- *ok = false; |
- return 0; |
- } |
- return m_impl->toUIntStrict(ok, base); |
-} |
- |
-int64_t String::toInt64Strict(bool* ok, int base) const |
-{ |
- if (!m_impl) { |
- if (ok) |
- *ok = false; |
- return 0; |
- } |
- return m_impl->toInt64Strict(ok, base); |
-} |
- |
-uint64_t String::toUInt64Strict(bool* ok, int base) const |
-{ |
- if (!m_impl) { |
- if (ok) |
- *ok = false; |
- return 0; |
- } |
- return m_impl->toUInt64Strict(ok, base); |
-} |
- |
-intptr_t String::toIntPtrStrict(bool* ok, int base) const |
-{ |
- if (!m_impl) { |
- if (ok) |
- *ok = false; |
- return 0; |
- } |
- return m_impl->toIntPtrStrict(ok, base); |
-} |
- |
-int String::toInt(bool* ok) const |
-{ |
- if (!m_impl) { |
- if (ok) |
- *ok = false; |
- return 0; |
- } |
- return m_impl->toInt(ok); |
-} |
- |
-unsigned String::toUInt(bool* ok) const |
-{ |
- if (!m_impl) { |
- if (ok) |
- *ok = false; |
- return 0; |
- } |
- return m_impl->toUInt(ok); |
-} |
- |
-int64_t String::toInt64(bool* ok) const |
-{ |
- if (!m_impl) { |
- if (ok) |
- *ok = false; |
- return 0; |
- } |
- return m_impl->toInt64(ok); |
-} |
- |
-uint64_t String::toUInt64(bool* ok) const |
-{ |
- if (!m_impl) { |
- if (ok) |
- *ok = false; |
- return 0; |
- } |
- return m_impl->toUInt64(ok); |
-} |
- |
-intptr_t String::toIntPtr(bool* ok) const |
-{ |
- if (!m_impl) { |
- if (ok) |
- *ok = false; |
- return 0; |
- } |
- return m_impl->toIntPtr(ok); |
-} |
- |
-double String::toDouble(bool* ok) const |
-{ |
- if (!m_impl) { |
- if (ok) |
- *ok = false; |
- return 0.0; |
- } |
- return m_impl->toDouble(ok); |
-} |
- |
-float String::toFloat(bool* ok) const |
-{ |
- if (!m_impl) { |
- if (ok) |
- *ok = false; |
- return 0.0f; |
- } |
- return m_impl->toFloat(ok); |
-} |
- |
-String String::isolatedCopy() const |
-{ |
- if (!m_impl) |
- return String(); |
- return m_impl->isolatedCopy(); |
-} |
- |
-bool String::isSafeToSendToAnotherThread() const |
-{ |
- if (!impl()) |
- return true; |
- // AtomicStrings are not safe to send between threads as ~StringImpl() |
- // will try to remove them from the wrong AtomicStringTable. |
- if (impl()->isAtomic()) |
- return false; |
- if (impl()->hasOneRef()) |
- return true; |
- if (isEmpty()) |
- return true; |
- return false; |
-} |
- |
-void String::split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const |
-{ |
- result.clear(); |
- |
- unsigned startPos = 0; |
- size_t endPos; |
- while ((endPos = find(separator, startPos)) != notFound) { |
- if (allowEmptyEntries || startPos != endPos) |
- result.append(substring(startPos, endPos - startPos)); |
- startPos = endPos + separator.length(); |
- } |
- if (allowEmptyEntries || startPos != length()) |
- result.append(substring(startPos)); |
-} |
- |
-void String::split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const |
-{ |
- result.clear(); |
- |
- unsigned startPos = 0; |
- size_t endPos; |
- while ((endPos = find(separator, startPos)) != notFound) { |
- if (allowEmptyEntries || startPos != endPos) |
- result.append(substring(startPos, endPos - startPos)); |
- startPos = endPos + 1; |
- } |
- if (allowEmptyEntries || startPos != length()) |
- result.append(substring(startPos)); |
-} |
- |
-CString String::ascii() const |
-{ |
- // Printable ASCII characters 32..127 and the null character are |
- // preserved, characters outside of this range are converted to '?'. |
- |
- unsigned length = this->length(); |
- if (!length) { |
- char* characterBuffer; |
- return CString::newUninitialized(length, characterBuffer); |
- } |
- |
- if (this->is8Bit()) { |
- const LChar* characters = this->characters8(); |
- |
- char* characterBuffer; |
- CString result = CString::newUninitialized(length, characterBuffer); |
- |
- for (unsigned i = 0; i < length; ++i) { |
- LChar ch = characters[i]; |
- characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch; |
- } |
- |
- return result; |
- } |
- |
- const UChar* characters = this->characters16(); |
- |
- char* characterBuffer; |
- CString result = CString::newUninitialized(length, characterBuffer); |
- |
- for (unsigned i = 0; i < length; ++i) { |
- UChar ch = characters[i]; |
- characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch; |
- } |
- |
- return result; |
-} |
- |
-CString String::latin1() const |
-{ |
- // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are |
- // preserved, characters outside of this range are converted to '?'. |
- |
- unsigned length = this->length(); |
- |
- if (!length) |
- return CString("", 0); |
- |
- if (is8Bit()) |
- return CString(reinterpret_cast<const char*>(this->characters8()), length); |
- |
- const UChar* characters = this->characters16(); |
- |
- char* characterBuffer; |
- CString result = CString::newUninitialized(length, characterBuffer); |
- |
- for (unsigned i = 0; i < length; ++i) { |
- UChar ch = characters[i]; |
- characterBuffer[i] = ch > 0xff ? '?' : ch; |
- } |
- |
- return result; |
-} |
- |
-// Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available. |
-static inline void putUTF8Triple(char*& buffer, UChar ch) |
-{ |
- ASSERT(ch >= 0x0800); |
- *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0); |
- *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80); |
- *buffer++ = static_cast<char>((ch & 0x3F) | 0x80); |
-} |
- |
-CString String::utf8(ConversionMode mode) const |
-{ |
- unsigned length = this->length(); |
- |
- if (!length) |
- return CString("", 0); |
- |
- // Allocate a buffer big enough to hold all the characters |
- // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes). |
- // Optimization ideas, if we find this function is hot: |
- // * We could speculatively create a CStringBuffer to contain 'length' |
- // characters, and resize if necessary (i.e. if the buffer contains |
- // non-ascii characters). (Alternatively, scan the buffer first for |
- // ascii characters, so we know this will be sufficient). |
- // * We could allocate a CStringBuffer with an appropriate size to |
- // have a good chance of being able to write the string into the |
- // buffer without reallocing (say, 1.5 x length). |
- if (length > numeric_limits<unsigned>::max() / 3) |
- return CString(); |
- Vector<char, 1024> bufferVector(length * 3); |
- |
- char* buffer = bufferVector.data(); |
- |
- if (is8Bit()) { |
- const LChar* characters = this->characters8(); |
- |
- ConversionResult result = convertLatin1ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size()); |
- ASSERT_UNUSED(result, result != targetExhausted); // (length * 3) should be sufficient for any conversion |
- } else { |
- const UChar* characters = this->characters16(); |
- |
- if (mode == StrictConversionReplacingUnpairedSurrogatesWithFFFD) { |
- const UChar* charactersEnd = characters + length; |
- char* bufferEnd = buffer + bufferVector.size(); |
- while (characters < charactersEnd) { |
- // Use strict conversion to detect unpaired surrogates. |
- ConversionResult result = convertUTF16ToUTF8(&characters, charactersEnd, &buffer, bufferEnd, true); |
- ASSERT(result != targetExhausted); |
- // Conversion fails when there is an unpaired surrogate. |
- // Put replacement character (U+FFFD) instead of the unpaired surrogate. |
- if (result != conversionOK) { |
- ASSERT((0xD800 <= *characters && *characters <= 0xDFFF)); |
- // There should be room left, since one UChar hasn't been converted. |
- ASSERT((buffer + 3) <= bufferEnd); |
- putUTF8Triple(buffer, replacementCharacter); |
- ++characters; |
- } |
- } |
- } else { |
- bool strict = mode == StrictConversion; |
- ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict); |
- ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion |
- |
- // Only produced from strict conversion. |
- if (result == sourceIllegal) { |
- ASSERT(strict); |
- return CString(); |
- } |
- |
- // Check for an unconverted high surrogate. |
- if (result == sourceExhausted) { |
- if (strict) |
- return CString(); |
- // This should be one unpaired high surrogate. Treat it the same |
- // was as an unpaired high surrogate would have been handled in |
- // the middle of a string with non-strict conversion - which is |
- // to say, simply encode it to UTF-8. |
- ASSERT((characters + 1) == (this->characters() + length)); |
- ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF)); |
- // There should be room left, since one UChar hasn't been converted. |
- ASSERT((buffer + 3) <= (buffer + bufferVector.size())); |
- putUTF8Triple(buffer, *characters); |
- } |
- } |
- } |
- |
- return CString(bufferVector.data(), buffer - bufferVector.data()); |
-} |
- |
-String String::make8BitFrom16BitSource(const UChar* source, size_t length) |
-{ |
- if (!length) |
- return String(); |
- |
- LChar* destination; |
- String result = String::createUninitialized(length, destination); |
- |
- copyLCharsFromUCharSource(destination, source, length); |
- |
- return result; |
-} |
- |
-String String::make16BitFrom8BitSource(const LChar* source, size_t length) |
-{ |
- if (!length) |
- return String(); |
- |
- UChar* destination; |
- String result = String::createUninitialized(length, destination); |
- |
- StringImpl::copyChars(destination, source, length); |
- |
- return result; |
-} |
- |
-String String::fromUTF8(const LChar* stringStart, size_t length) |
-{ |
- RELEASE_ASSERT(length <= numeric_limits<unsigned>::max()); |
- |
- if (!stringStart) |
- return String(); |
- |
- if (!length) |
- return emptyString(); |
- |
- // We'll use a StringImpl as a buffer; if the source string only contains ascii this should be |
- // the right length, if there are any multi-byte sequences this buffer will be too large. |
- UChar* buffer; |
- String stringBuffer(StringImpl::createUninitialized(length, buffer)); |
- UChar* bufferEnd = buffer + length; |
- |
- // Try converting into the buffer. |
- const char* stringCurrent = reinterpret_cast<const char*>(stringStart); |
- bool isAllASCII; |
- if (convertUTF8ToUTF16(&stringCurrent, reinterpret_cast<const char *>(stringStart + length), &buffer, bufferEnd, &isAllASCII) != conversionOK) |
- return String(); |
- |
- if (isAllASCII) |
- return String(stringStart, length); |
- |
- // stringBuffer is full (the input must have been all ascii) so just return it! |
- if (buffer == bufferEnd) |
- return stringBuffer; |
- |
- // stringBuffer served its purpose as a buffer, copy the contents out into a new string. |
- unsigned utf16Length = buffer - stringBuffer.characters(); |
- ASSERT(utf16Length < length); |
- return String(stringBuffer.characters(), utf16Length); |
-} |
- |
-String String::fromUTF8(const LChar* string) |
-{ |
- if (!string) |
- return String(); |
- return fromUTF8(string, strlen(reinterpret_cast<const char*>(string))); |
-} |
- |
-String String::fromUTF8(const CString& s) |
-{ |
- return fromUTF8(s.data()); |
-} |
- |
-String String::fromUTF8WithLatin1Fallback(const LChar* string, size_t size) |
-{ |
- String utf8 = fromUTF8(string, size); |
- if (!utf8) |
- return String(string, size); |
- return utf8; |
-} |
- |
-// String Operations |
- |
-static bool isCharacterAllowedInBase(UChar c, int base) |
-{ |
- if (c > 0x7F) |
- return false; |
- if (isASCIIDigit(c)) |
- return c - '0' < base; |
- if (isASCIIAlpha(c)) { |
- if (base > 36) |
- base = 36; |
- return (c >= 'a' && c < 'a' + base - 10) |
- || (c >= 'A' && c < 'A' + base - 10); |
- } |
- return false; |
-} |
- |
-template <typename IntegralType, typename CharType> |
-static inline IntegralType toIntegralType(const CharType* data, size_t length, bool* ok, int base) |
-{ |
- static const IntegralType integralMax = numeric_limits<IntegralType>::max(); |
- static const bool isSigned = numeric_limits<IntegralType>::is_signed; |
- const IntegralType maxMultiplier = integralMax / base; |
- |
- IntegralType value = 0; |
- bool isOk = false; |
- bool isNegative = false; |
- |
- if (!data) |
- goto bye; |
- |
- // skip leading whitespace |
- while (length && isSpaceOrNewline(*data)) { |
- --length; |
- ++data; |
- } |
- |
- if (isSigned && length && *data == '-') { |
- --length; |
- ++data; |
- isNegative = true; |
- } else if (length && *data == '+') { |
- --length; |
- ++data; |
- } |
- |
- if (!length || !isCharacterAllowedInBase(*data, base)) |
- goto bye; |
- |
- while (length && isCharacterAllowedInBase(*data, base)) { |
- --length; |
- IntegralType digitValue; |
- CharType c = *data; |
- if (isASCIIDigit(c)) |
- digitValue = c - '0'; |
- else if (c >= 'a') |
- digitValue = c - 'a' + 10; |
- else |
- digitValue = c - 'A' + 10; |
- |
- if (value > maxMultiplier || (value == maxMultiplier && digitValue > (integralMax % base) + isNegative)) |
- goto bye; |
- |
- value = base * value + digitValue; |
- ++data; |
- } |
- |
-#if COMPILER(MSVC) |
-#pragma warning(push, 0) |
-#pragma warning(disable:4146) |
-#endif |
- |
- if (isNegative) |
- value = -value; |
- |
-#if COMPILER(MSVC) |
-#pragma warning(pop) |
-#endif |
- |
- // skip trailing space |
- while (length && isSpaceOrNewline(*data)) { |
- --length; |
- ++data; |
- } |
- |
- if (!length) |
- isOk = true; |
-bye: |
- if (ok) |
- *ok = isOk; |
- return isOk ? value : 0; |
-} |
- |
-template <typename CharType> |
-static unsigned lengthOfCharactersAsInteger(const CharType* data, size_t length) |
-{ |
- size_t i = 0; |
- |
- // Allow leading spaces. |
- for (; i != length; ++i) { |
- if (!isSpaceOrNewline(data[i])) |
- break; |
- } |
- |
- // Allow sign. |
- if (i != length && (data[i] == '+' || data[i] == '-')) |
- ++i; |
- |
- // Allow digits. |
- for (; i != length; ++i) { |
- if (!isASCIIDigit(data[i])) |
- break; |
- } |
- |
- return i; |
-} |
- |
-int charactersToIntStrict(const LChar* data, size_t length, bool* ok, int base) |
-{ |
- return toIntegralType<int, LChar>(data, length, ok, base); |
-} |
- |
-int charactersToIntStrict(const UChar* data, size_t length, bool* ok, int base) |
-{ |
- return toIntegralType<int, UChar>(data, length, ok, base); |
-} |
- |
-unsigned charactersToUIntStrict(const LChar* data, size_t length, bool* ok, int base) |
-{ |
- return toIntegralType<unsigned, LChar>(data, length, ok, base); |
-} |
- |
-unsigned charactersToUIntStrict(const UChar* data, size_t length, bool* ok, int base) |
-{ |
- return toIntegralType<unsigned, UChar>(data, length, ok, base); |
-} |
- |
-int64_t charactersToInt64Strict(const LChar* data, size_t length, bool* ok, int base) |
-{ |
- return toIntegralType<int64_t, LChar>(data, length, ok, base); |
-} |
- |
-int64_t charactersToInt64Strict(const UChar* data, size_t length, bool* ok, int base) |
-{ |
- return toIntegralType<int64_t, UChar>(data, length, ok, base); |
-} |
- |
-uint64_t charactersToUInt64Strict(const LChar* data, size_t length, bool* ok, int base) |
-{ |
- return toIntegralType<uint64_t, LChar>(data, length, ok, base); |
-} |
- |
-uint64_t charactersToUInt64Strict(const UChar* data, size_t length, bool* ok, int base) |
-{ |
- return toIntegralType<uint64_t, UChar>(data, length, ok, base); |
-} |
- |
-intptr_t charactersToIntPtrStrict(const LChar* data, size_t length, bool* ok, int base) |
-{ |
- return toIntegralType<intptr_t, LChar>(data, length, ok, base); |
-} |
- |
-intptr_t charactersToIntPtrStrict(const UChar* data, size_t length, bool* ok, int base) |
-{ |
- return toIntegralType<intptr_t, UChar>(data, length, ok, base); |
-} |
- |
-int charactersToInt(const LChar* data, size_t length, bool* ok) |
-{ |
- return toIntegralType<int, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10); |
-} |
- |
-int charactersToInt(const UChar* data, size_t length, bool* ok) |
-{ |
- return toIntegralType<int, UChar>(data, lengthOfCharactersAsInteger(data, length), ok, 10); |
-} |
- |
-unsigned charactersToUInt(const LChar* data, size_t length, bool* ok) |
-{ |
- return toIntegralType<unsigned, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10); |
-} |
- |
-unsigned charactersToUInt(const UChar* data, size_t length, bool* ok) |
-{ |
- return toIntegralType<unsigned, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10); |
-} |
- |
-int64_t charactersToInt64(const LChar* data, size_t length, bool* ok) |
-{ |
- return toIntegralType<int64_t, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10); |
-} |
- |
-int64_t charactersToInt64(const UChar* data, size_t length, bool* ok) |
-{ |
- return toIntegralType<int64_t, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10); |
-} |
- |
-uint64_t charactersToUInt64(const LChar* data, size_t length, bool* ok) |
-{ |
- return toIntegralType<uint64_t, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10); |
-} |
- |
-uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok) |
-{ |
- return toIntegralType<uint64_t, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10); |
-} |
- |
-intptr_t charactersToIntPtr(const LChar* data, size_t length, bool* ok) |
-{ |
- return toIntegralType<intptr_t, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10); |
-} |
- |
-intptr_t charactersToIntPtr(const UChar* data, size_t length, bool* ok) |
-{ |
- return toIntegralType<intptr_t, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10); |
-} |
- |
-enum TrailingJunkPolicy { DisallowTrailingJunk, AllowTrailingJunk }; |
- |
-template <typename CharType, TrailingJunkPolicy policy> |
-static inline double toDoubleType(const CharType* data, size_t length, bool* ok, size_t& parsedLength) |
-{ |
- size_t leadingSpacesLength = 0; |
- while (leadingSpacesLength < length && isASCIISpace(data[leadingSpacesLength])) |
- ++leadingSpacesLength; |
- |
- double number = parseDouble(data + leadingSpacesLength, length - leadingSpacesLength, parsedLength); |
- if (!parsedLength) { |
- if (ok) |
- *ok = false; |
- return 0.0; |
- } |
- |
- parsedLength += leadingSpacesLength; |
- if (ok) |
- *ok = policy == AllowTrailingJunk || parsedLength == length; |
- return number; |
-} |
- |
-double charactersToDouble(const LChar* data, size_t length, bool* ok) |
-{ |
- size_t parsedLength; |
- return toDoubleType<LChar, DisallowTrailingJunk>(data, length, ok, parsedLength); |
-} |
- |
-double charactersToDouble(const UChar* data, size_t length, bool* ok) |
-{ |
- size_t parsedLength; |
- return toDoubleType<UChar, DisallowTrailingJunk>(data, length, ok, parsedLength); |
-} |
- |
-float charactersToFloat(const LChar* data, size_t length, bool* ok) |
-{ |
- // FIXME: This will return ok even when the string fits into a double but not a float. |
- size_t parsedLength; |
- return static_cast<float>(toDoubleType<LChar, DisallowTrailingJunk>(data, length, ok, parsedLength)); |
-} |
- |
-float charactersToFloat(const UChar* data, size_t length, bool* ok) |
-{ |
- // FIXME: This will return ok even when the string fits into a double but not a float. |
- size_t parsedLength; |
- return static_cast<float>(toDoubleType<UChar, DisallowTrailingJunk>(data, length, ok, parsedLength)); |
-} |
- |
-float charactersToFloat(const LChar* data, size_t length, size_t& parsedLength) |
-{ |
- // FIXME: This will return ok even when the string fits into a double but not a float. |
- return static_cast<float>(toDoubleType<LChar, AllowTrailingJunk>(data, length, 0, parsedLength)); |
-} |
- |
-float charactersToFloat(const UChar* data, size_t length, size_t& parsedLength) |
-{ |
- // FIXME: This will return ok even when the string fits into a double but not a float. |
- return static_cast<float>(toDoubleType<UChar, AllowTrailingJunk>(data, length, 0, parsedLength)); |
-} |
- |
-const String& emptyString() |
-{ |
- DEFINE_STATIC_LOCAL(String, emptyString, (StringImpl::empty())); |
- return emptyString; |
-} |
- |
-} // namespace WTF |
- |
-#ifndef NDEBUG |
-// For use in the debugger |
-String* string(const char*); |
-Vector<char> asciiDebug(StringImpl* impl); |
-Vector<char> asciiDebug(String& string); |
- |
-void String::show() const |
-{ |
- dataLogF("%s\n", asciiDebug(impl()).data()); |
-} |
- |
-String* string(const char* s) |
-{ |
- // leaks memory! |
- return new String(s); |
-} |
- |
-Vector<char> asciiDebug(StringImpl* impl) |
-{ |
- if (!impl) |
- return asciiDebug(String("[null]").impl()); |
- |
- Vector<char> buffer; |
- for (unsigned i = 0; i < impl->length(); ++i) { |
- UChar ch = (*impl)[i]; |
- if (isASCIIPrintable(ch)) { |
- if (ch == '\\') |
- buffer.append(ch); |
- buffer.append(ch); |
- } else { |
- buffer.append('\\'); |
- buffer.append('u'); |
- appendUnsignedAsHexFixedSize(ch, buffer, 4); |
- } |
- } |
- buffer.append('\0'); |
- return buffer; |
-} |
- |
-Vector<char> asciiDebug(String& string) |
-{ |
- return asciiDebug(string.impl()); |
-} |
- |
-#endif |