| Index: third_party/WebKit/Source/wtf/text/StringImpl.cpp
|
| diff --git a/third_party/WebKit/Source/wtf/text/StringImpl.cpp b/third_party/WebKit/Source/wtf/text/StringImpl.cpp
|
| deleted file mode 100644
|
| index 684b01b4840938a6777ff63fae2ace9b298edf49..0000000000000000000000000000000000000000
|
| --- a/third_party/WebKit/Source/wtf/text/StringImpl.cpp
|
| +++ /dev/null
|
| @@ -1,2244 +0,0 @@
|
| -/*
|
| - * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
|
| - * (C) 1999 Antti Koivisto (koivisto@kde.org)
|
| - * (C) 2001 Dirk Mueller ( mueller@kde.org )
|
| - * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All
|
| - * rights reserved.
|
| - * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
|
| - *
|
| - * This library is free software; you can redistribute it and/or
|
| - * modify it under the terms of the GNU Library General Public
|
| - * License as published by the Free Software Foundation; either
|
| - * version 2 of the License, or (at your option) any later version.
|
| - *
|
| - * This library is distributed in the hope that it will be useful,
|
| - * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| - * Library General Public License for more details.
|
| - *
|
| - * You should have received a copy of the GNU Library General Public License
|
| - * along with this library; see the file COPYING.LIB. If not, write to
|
| - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
| - * Boston, MA 02110-1301, USA.
|
| - *
|
| - */
|
| -
|
| -#include "wtf/text/StringImpl.h"
|
| -
|
| -#include "wtf/DynamicAnnotations.h"
|
| -#include "wtf/LeakAnnotations.h"
|
| -#include "wtf/PtrUtil.h"
|
| -#include "wtf/StaticConstructors.h"
|
| -#include "wtf/StdLibExtras.h"
|
| -#include "wtf/allocator/Partitions.h"
|
| -#include "wtf/text/AtomicString.h"
|
| -#include "wtf/text/AtomicStringTable.h"
|
| -#include "wtf/text/CString.h"
|
| -#include "wtf/text/CharacterNames.h"
|
| -#include "wtf/text/StringBuffer.h"
|
| -#include "wtf/text/StringHash.h"
|
| -#include "wtf/text/StringToNumber.h"
|
| -#include <algorithm>
|
| -#include <memory>
|
| -
|
| -#ifdef STRING_STATS
|
| -#include "wtf/DataLog.h"
|
| -#include "wtf/HashMap.h"
|
| -#include "wtf/HashSet.h"
|
| -#include "wtf/RefCounted.h"
|
| -#include "wtf/ThreadingPrimitives.h"
|
| -#include <unistd.h>
|
| -#endif
|
| -
|
| -using namespace std;
|
| -
|
| -namespace WTF {
|
| -
|
| -using namespace Unicode;
|
| -
|
| -// As of Jan 2017, StringImpl needs 2 * sizeof(int) + 29 bits of data, and
|
| -// sizeof(ThreadRestrictionVerifier) is 16 bytes. Thus, in DCHECK mode the
|
| -// class may be padded to 32 bytes.
|
| -#if DCHECK_IS_ON()
|
| -static_assert(sizeof(StringImpl) <= 8 * sizeof(int),
|
| - "StringImpl should stay small");
|
| -#else
|
| -static_assert(sizeof(StringImpl) <= 3 * sizeof(int),
|
| - "StringImpl should stay small");
|
| -#endif
|
| -
|
| -#ifdef STRING_STATS
|
| -
|
| -static Mutex& statsMutex() {
|
| - DEFINE_STATIC_LOCAL(Mutex, mutex, ());
|
| - return mutex;
|
| -}
|
| -
|
| -static HashSet<void*>& liveStrings() {
|
| - // Notice that we can't use HashSet<StringImpl*> because then HashSet would
|
| - // dedup identical strings.
|
| - DEFINE_STATIC_LOCAL(HashSet<void*>, strings, ());
|
| - return strings;
|
| -}
|
| -
|
| -void addStringForStats(StringImpl* string) {
|
| - MutexLocker locker(statsMutex());
|
| - liveStrings().add(string);
|
| -}
|
| -
|
| -void removeStringForStats(StringImpl* string) {
|
| - MutexLocker locker(statsMutex());
|
| - liveStrings().remove(string);
|
| -}
|
| -
|
| -static void fillWithSnippet(const StringImpl* string, Vector<char>& snippet) {
|
| - const unsigned kMaxSnippetLength = 64;
|
| - snippet.clear();
|
| -
|
| - size_t expectedLength = std::min(string->length(), kMaxSnippetLength);
|
| - if (expectedLength == kMaxSnippetLength)
|
| - expectedLength += 3; // For the "...".
|
| - ++expectedLength; // For the terminating '\0'.
|
| - snippet.reserveCapacity(expectedLength);
|
| -
|
| - size_t i;
|
| - for (i = 0; i < string->length() && i < kMaxSnippetLength; ++i) {
|
| - UChar c = (*string)[i];
|
| - if (isASCIIPrintable(c))
|
| - snippet.append(c);
|
| - else
|
| - snippet.append('?');
|
| - }
|
| - if (i < string->length()) {
|
| - snippet.append('.');
|
| - snippet.append('.');
|
| - snippet.append('.');
|
| - }
|
| - snippet.append('\0');
|
| -}
|
| -
|
| -static bool isUnnecessarilyWide(const StringImpl* string) {
|
| - if (string->is8Bit())
|
| - return false;
|
| - UChar c = 0;
|
| - for (unsigned i = 0; i < string->length(); ++i)
|
| - c |= (*string)[i] >> 8;
|
| - return !c;
|
| -}
|
| -
|
| -class PerStringStats : public RefCounted<PerStringStats> {
|
| - public:
|
| - static PassRefPtr<PerStringStats> create() {
|
| - return adoptRef(new PerStringStats);
|
| - }
|
| -
|
| - void add(const StringImpl* string) {
|
| - ++m_numberOfCopies;
|
| - if (!m_length) {
|
| - m_length = string->length();
|
| - fillWithSnippet(string, m_snippet);
|
| - }
|
| - if (string->isAtomic())
|
| - ++m_numberOfAtomicCopies;
|
| - if (isUnnecessarilyWide(string))
|
| - m_unnecessarilyWide = true;
|
| - }
|
| -
|
| - size_t totalCharacters() const { return m_numberOfCopies * m_length; }
|
| -
|
| - void print() {
|
| - const char* status = "ok";
|
| - if (m_unnecessarilyWide)
|
| - status = "16";
|
| - dataLogF("%8u copies (%s) of length %8u %s\n", m_numberOfCopies, status,
|
| - m_length, m_snippet.data());
|
| - }
|
| -
|
| - bool m_unnecessarilyWide;
|
| - unsigned m_numberOfCopies;
|
| - unsigned m_length;
|
| - unsigned m_numberOfAtomicCopies;
|
| - Vector<char> m_snippet;
|
| -
|
| - private:
|
| - PerStringStats()
|
| - : m_unnecessarilyWide(false),
|
| - m_numberOfCopies(0),
|
| - m_length(0),
|
| - m_numberOfAtomicCopies(0) {}
|
| -};
|
| -
|
| -bool operator<(const RefPtr<PerStringStats>& a,
|
| - const RefPtr<PerStringStats>& b) {
|
| - if (a->m_unnecessarilyWide != b->m_unnecessarilyWide)
|
| - return !a->m_unnecessarilyWide && b->m_unnecessarilyWide;
|
| - if (a->totalCharacters() != b->totalCharacters())
|
| - return a->totalCharacters() < b->totalCharacters();
|
| - if (a->m_numberOfCopies != b->m_numberOfCopies)
|
| - return a->m_numberOfCopies < b->m_numberOfCopies;
|
| - if (a->m_length != b->m_length)
|
| - return a->m_length < b->m_length;
|
| - return a->m_numberOfAtomicCopies < b->m_numberOfAtomicCopies;
|
| -}
|
| -
|
| -static void printLiveStringStats(void*) {
|
| - MutexLocker locker(statsMutex());
|
| - HashSet<void*>& strings = liveStrings();
|
| -
|
| - HashMap<StringImpl*, RefPtr<PerStringStats>> stats;
|
| - for (HashSet<void*>::iterator iter = strings.begin(); iter != strings.end();
|
| - ++iter) {
|
| - StringImpl* string = static_cast<StringImpl*>(*iter);
|
| - HashMap<StringImpl*, RefPtr<PerStringStats>>::iterator entry =
|
| - stats.find(string);
|
| - RefPtr<PerStringStats> value =
|
| - entry == stats.end() ? RefPtr<PerStringStats>(PerStringStats::create())
|
| - : entry->value;
|
| - value->add(string);
|
| - stats.set(string, value.release());
|
| - }
|
| -
|
| - Vector<RefPtr<PerStringStats>> all;
|
| - for (HashMap<StringImpl*, RefPtr<PerStringStats>>::iterator iter =
|
| - stats.begin();
|
| - iter != stats.end(); ++iter)
|
| - all.append(iter->value);
|
| -
|
| - std::sort(all.begin(), all.end());
|
| - std::reverse(all.begin(), all.end());
|
| - for (size_t i = 0; i < 20 && i < all.size(); ++i)
|
| - all[i]->print();
|
| -}
|
| -
|
| -StringStats StringImpl::m_stringStats;
|
| -
|
| -unsigned StringStats::s_stringRemovesTillPrintStats =
|
| - StringStats::s_printStringStatsFrequency;
|
| -
|
| -void StringStats::removeString(StringImpl* string) {
|
| - unsigned length = string->length();
|
| - --m_totalNumberStrings;
|
| -
|
| - if (string->is8Bit()) {
|
| - --m_number8BitStrings;
|
| - m_total8BitData -= length;
|
| - } else {
|
| - --m_number16BitStrings;
|
| - m_total16BitData -= length;
|
| - }
|
| -
|
| - if (!--s_stringRemovesTillPrintStats) {
|
| - s_stringRemovesTillPrintStats = s_printStringStatsFrequency;
|
| - printStats();
|
| - }
|
| -}
|
| -
|
| -void StringStats::printStats() {
|
| - dataLogF("String stats for process id %d:\n", getpid());
|
| -
|
| - unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitData;
|
| - double percent8Bit =
|
| - m_totalNumberStrings
|
| - ? ((double)m_number8BitStrings * 100) / (double)m_totalNumberStrings
|
| - : 0.0;
|
| - double average8bitLength =
|
| - m_number8BitStrings
|
| - ? (double)m_total8BitData / (double)m_number8BitStrings
|
| - : 0.0;
|
| - dataLogF(
|
| - "%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length "
|
| - "%6.1f\n",
|
| - m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData,
|
| - average8bitLength);
|
| -
|
| - double percent16Bit =
|
| - m_totalNumberStrings
|
| - ? ((double)m_number16BitStrings * 100) / (double)m_totalNumberStrings
|
| - : 0.0;
|
| - double average16bitLength =
|
| - m_number16BitStrings
|
| - ? (double)m_total16BitData / (double)m_number16BitStrings
|
| - : 0.0;
|
| - dataLogF(
|
| - "%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length "
|
| - "%6.1f\n",
|
| - m_number16BitStrings, percent16Bit, m_total16BitData,
|
| - m_total16BitData * 2, average16bitLength);
|
| -
|
| - double averageLength =
|
| - m_totalNumberStrings
|
| - ? (double)totalNumberCharacters / (double)m_totalNumberStrings
|
| - : 0.0;
|
| - unsigned long long totalDataBytes = m_total8BitData + m_total16BitData * 2;
|
| - dataLogF(
|
| - "%8u Total %12llu chars %12llu bytes avg length "
|
| - "%6.1f\n",
|
| - m_totalNumberStrings, totalNumberCharacters, totalDataBytes,
|
| - averageLength);
|
| - unsigned long long totalSavedBytes = m_total8BitData;
|
| - double percentSavings = totalSavedBytes
|
| - ? ((double)totalSavedBytes * 100) /
|
| - (double)(totalDataBytes + totalSavedBytes)
|
| - : 0.0;
|
| - dataLogF(" Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes,
|
| - percentSavings);
|
| -
|
| - unsigned totalOverhead = m_totalNumberStrings * sizeof(StringImpl);
|
| - double overheadPercent = (double)totalOverhead / (double)totalDataBytes * 100;
|
| - dataLogF(" StringImpl overheader: %8u (%5.2f%%)\n", totalOverhead,
|
| - overheadPercent);
|
| -
|
| - internal::callOnMainThread(&printLiveStringStats, nullptr);
|
| -}
|
| -#endif
|
| -
|
| -void* StringImpl::operator new(size_t size) {
|
| - DCHECK_EQ(size, sizeof(StringImpl));
|
| - return Partitions::bufferMalloc(size, "WTF::StringImpl");
|
| -}
|
| -
|
| -void StringImpl::operator delete(void* ptr) {
|
| - Partitions::bufferFree(ptr);
|
| -}
|
| -
|
| -inline StringImpl::~StringImpl() {
|
| - DCHECK(!isStatic());
|
| -
|
| - STRING_STATS_REMOVE_STRING(this);
|
| -
|
| - if (isAtomic())
|
| - AtomicStringTable::instance().remove(this);
|
| -}
|
| -
|
| -void StringImpl::destroyIfNotStatic() const {
|
| - if (!isStatic())
|
| - delete this;
|
| -}
|
| -
|
| -void StringImpl::updateContainsOnlyASCII() const {
|
| - m_containsOnlyASCII = is8Bit()
|
| - ? charactersAreAllASCII(characters8(), length())
|
| - : charactersAreAllASCII(characters16(), length());
|
| - m_needsASCIICheck = false;
|
| -}
|
| -
|
| -bool StringImpl::isSafeToSendToAnotherThread() const {
|
| - if (isStatic())
|
| - return true;
|
| - // AtomicStrings are not safe to send between threads as ~StringImpl()
|
| - // will try to remove them from the wrong AtomicStringTable.
|
| - if (isAtomic())
|
| - return false;
|
| - if (hasOneRef())
|
| - return true;
|
| - return false;
|
| -}
|
| -
|
| -#if DCHECK_IS_ON()
|
| -std::string StringImpl::asciiForDebugging() const {
|
| - CString ascii = String(isolatedCopy()->substring(0, 128)).ascii();
|
| - return std::string(ascii.data(), ascii.length());
|
| -}
|
| -#endif
|
| -
|
| -PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length,
|
| - LChar*& data) {
|
| - if (!length) {
|
| - data = 0;
|
| - return empty;
|
| - }
|
| -
|
| - // Allocate a single buffer large enough to contain the StringImpl
|
| - // struct as well as the data which it contains. This removes one
|
| - // heap allocation from this call.
|
| - StringImpl* string = static_cast<StringImpl*>(Partitions::bufferMalloc(
|
| - allocationSize<LChar>(length), "WTF::StringImpl"));
|
| -
|
| - data = reinterpret_cast<LChar*>(string + 1);
|
| - return adoptRef(new (string) StringImpl(length, Force8BitConstructor));
|
| -}
|
| -
|
| -PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length,
|
| - UChar*& data) {
|
| - if (!length) {
|
| - data = 0;
|
| - return empty;
|
| - }
|
| -
|
| - // Allocate a single buffer large enough to contain the StringImpl
|
| - // struct as well as the data which it contains. This removes one
|
| - // heap allocation from this call.
|
| - StringImpl* string = static_cast<StringImpl*>(Partitions::bufferMalloc(
|
| - allocationSize<UChar>(length), "WTF::StringImpl"));
|
| -
|
| - data = reinterpret_cast<UChar*>(string + 1);
|
| - return adoptRef(new (string) StringImpl(length));
|
| -}
|
| -
|
| -static StaticStringsTable& staticStrings() {
|
| - DEFINE_STATIC_LOCAL(StaticStringsTable, staticStrings, ());
|
| - return staticStrings;
|
| -}
|
| -
|
| -#if DCHECK_IS_ON()
|
| -static bool s_allowCreationOfStaticStrings = true;
|
| -#endif
|
| -
|
| -const StaticStringsTable& StringImpl::allStaticStrings() {
|
| - return staticStrings();
|
| -}
|
| -
|
| -void StringImpl::freezeStaticStrings() {
|
| - DCHECK(isMainThread());
|
| -
|
| -#if DCHECK_IS_ON()
|
| - s_allowCreationOfStaticStrings = false;
|
| -#endif
|
| -}
|
| -
|
| -unsigned StringImpl::m_highestStaticStringLength = 0;
|
| -
|
| -DEFINE_GLOBAL(StringImpl, globalEmpty);
|
| -DEFINE_GLOBAL(StringImpl, globalEmpty16Bit);
|
| -// Callers need the global empty strings to be non-const.
|
| -StringImpl* StringImpl::empty = const_cast<StringImpl*>(&globalEmpty);
|
| -StringImpl* StringImpl::empty16Bit = const_cast<StringImpl*>(&globalEmpty16Bit);
|
| -void StringImpl::initStatics() {
|
| - new ((void*)empty) StringImpl(ConstructEmptyString);
|
| - new ((void*)empty16Bit) StringImpl(ConstructEmptyString16Bit);
|
| - WTF_ANNOTATE_BENIGN_RACE(StringImpl::empty,
|
| - "Benign race on the reference counter of a static "
|
| - "string created by StringImpl::empty");
|
| - WTF_ANNOTATE_BENIGN_RACE(StringImpl::empty16Bit,
|
| - "Benign race on the reference counter of a static "
|
| - "string created by StringImpl::empty16Bit");
|
| -}
|
| -
|
| -StringImpl* StringImpl::createStatic(const char* string,
|
| - unsigned length,
|
| - unsigned hash) {
|
| -#if DCHECK_IS_ON()
|
| - DCHECK(s_allowCreationOfStaticStrings);
|
| -#endif
|
| - DCHECK(string);
|
| - DCHECK(length);
|
| -
|
| - StaticStringsTable::const_iterator it = staticStrings().find(hash);
|
| - if (it != staticStrings().end()) {
|
| - DCHECK(!memcmp(string, it->value + 1, length * sizeof(LChar)));
|
| - return it->value;
|
| - }
|
| -
|
| - // Allocate a single buffer large enough to contain the StringImpl
|
| - // struct as well as the data which it contains. This removes one
|
| - // heap allocation from this call.
|
| - RELEASE_ASSERT(length <=
|
| - ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) /
|
| - sizeof(LChar)));
|
| - size_t size = sizeof(StringImpl) + length * sizeof(LChar);
|
| -
|
| - WTF_INTERNAL_LEAK_SANITIZER_DISABLED_SCOPE;
|
| - StringImpl* impl = static_cast<StringImpl*>(
|
| - Partitions::bufferMalloc(size, "WTF::StringImpl"));
|
| -
|
| - LChar* data = reinterpret_cast<LChar*>(impl + 1);
|
| - impl = new (impl) StringImpl(length, hash, StaticString);
|
| - memcpy(data, string, length * sizeof(LChar));
|
| -#if DCHECK_IS_ON()
|
| - impl->assertHashIsCorrect();
|
| -#endif
|
| -
|
| - DCHECK(isMainThread());
|
| - m_highestStaticStringLength = std::max(m_highestStaticStringLength, length);
|
| - staticStrings().insert(hash, impl);
|
| - WTF_ANNOTATE_BENIGN_RACE(impl,
|
| - "Benign race on the reference counter of a static "
|
| - "string created by StringImpl::createStatic");
|
| -
|
| - return impl;
|
| -}
|
| -
|
| -void StringImpl::reserveStaticStringsCapacityForSize(unsigned size) {
|
| -#if DCHECK_IS_ON()
|
| - DCHECK(s_allowCreationOfStaticStrings);
|
| -#endif
|
| - staticStrings().reserveCapacityForSize(size);
|
| -}
|
| -
|
| -PassRefPtr<StringImpl> StringImpl::create(const UChar* characters,
|
| - unsigned length) {
|
| - if (!characters || !length)
|
| - return empty;
|
| -
|
| - UChar* data;
|
| - RefPtr<StringImpl> string = createUninitialized(length, data);
|
| - memcpy(data, characters, length * sizeof(UChar));
|
| - return string.release();
|
| -}
|
| -
|
| -PassRefPtr<StringImpl> StringImpl::create(const LChar* characters,
|
| - unsigned length) {
|
| - if (!characters || !length)
|
| - return empty;
|
| -
|
| - LChar* data;
|
| - RefPtr<StringImpl> string = createUninitialized(length, data);
|
| - memcpy(data, characters, length * sizeof(LChar));
|
| - return string.release();
|
| -}
|
| -
|
| -PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters,
|
| - unsigned length) {
|
| - if (!characters || !length)
|
| - return empty;
|
| -
|
| - LChar* data;
|
| - RefPtr<StringImpl> string = createUninitialized(length, data);
|
| -
|
| - for (size_t i = 0; i < length; ++i) {
|
| - if (characters[i] & 0xff00)
|
| - return create(characters, length);
|
| - data[i] = static_cast<LChar>(characters[i]);
|
| - }
|
| -
|
| - return string.release();
|
| -}
|
| -
|
| -PassRefPtr<StringImpl> StringImpl::create(const LChar* string) {
|
| - if (!string)
|
| - return empty;
|
| - size_t length = strlen(reinterpret_cast<const char*>(string));
|
| - RELEASE_ASSERT(length <= numeric_limits<unsigned>::max());
|
| - return create(string, length);
|
| -}
|
| -
|
| -bool StringImpl::containsOnlyWhitespace() {
|
| - // FIXME: The definition of whitespace here includes a number of characters
|
| - // that are not whitespace from the point of view of LayoutText; I wonder if
|
| - // that's a problem in practice.
|
| - if (is8Bit()) {
|
| - for (unsigned i = 0; i < m_length; ++i) {
|
| - UChar c = characters8()[i];
|
| - if (!isASCIISpace(c))
|
| - return false;
|
| - }
|
| -
|
| - return true;
|
| - }
|
| -
|
| - for (unsigned i = 0; i < m_length; ++i) {
|
| - UChar c = characters16()[i];
|
| - if (!isASCIISpace(c))
|
| - return false;
|
| - }
|
| - return true;
|
| -}
|
| -
|
| -PassRefPtr<StringImpl> StringImpl::substring(unsigned start,
|
| - unsigned length) const {
|
| - if (start >= m_length)
|
| - return empty;
|
| - unsigned maxLength = m_length - start;
|
| - if (length >= maxLength) {
|
| - // PassRefPtr has trouble dealing with const arguments. It should be updated
|
| - // so this const_cast is not necessary.
|
| - if (!start)
|
| - return const_cast<StringImpl*>(this);
|
| - length = maxLength;
|
| - }
|
| - if (is8Bit())
|
| - return create(characters8() + start, length);
|
| -
|
| - return create(characters16() + start, length);
|
| -}
|
| -
|
| -UChar32 StringImpl::characterStartingAt(unsigned i) {
|
| - if (is8Bit())
|
| - return characters8()[i];
|
| - if (U16_IS_SINGLE(characters16()[i]))
|
| - return characters16()[i];
|
| - if (i + 1 < m_length && U16_IS_LEAD(characters16()[i]) &&
|
| - U16_IS_TRAIL(characters16()[i + 1]))
|
| - return U16_GET_SUPPLEMENTARY(characters16()[i], characters16()[i + 1]);
|
| - return 0;
|
| -}
|
| -
|
| -unsigned StringImpl::copyTo(UChar* buffer,
|
| - unsigned start,
|
| - unsigned maxLength) const {
|
| - unsigned numberOfCharactersToCopy = std::min(length() - start, maxLength);
|
| - if (!numberOfCharactersToCopy)
|
| - return 0;
|
| - if (is8Bit())
|
| - copyChars(buffer, characters8() + start, numberOfCharactersToCopy);
|
| - else
|
| - copyChars(buffer, characters16() + start, numberOfCharactersToCopy);
|
| - return numberOfCharactersToCopy;
|
| -}
|
| -
|
| -PassRefPtr<StringImpl> StringImpl::lowerASCII() {
|
| - // First scan the string for uppercase and non-ASCII characters:
|
| - if (is8Bit()) {
|
| - unsigned firstIndexToBeLowered = m_length;
|
| - for (unsigned i = 0; i < m_length; ++i) {
|
| - LChar ch = characters8()[i];
|
| - if (isASCIIUpper(ch)) {
|
| - firstIndexToBeLowered = i;
|
| - break;
|
| - }
|
| - }
|
| -
|
| - // Nothing to do if the string is all ASCII with no uppercase.
|
| - if (firstIndexToBeLowered == m_length) {
|
| - return this;
|
| - }
|
| -
|
| - LChar* data8;
|
| - RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);
|
| - memcpy(data8, characters8(), firstIndexToBeLowered);
|
| -
|
| - for (unsigned i = firstIndexToBeLowered; i < m_length; ++i) {
|
| - LChar ch = characters8()[i];
|
| - data8[i] = isASCIIUpper(ch) ? toASCIILower(ch) : ch;
|
| - }
|
| - return newImpl.release();
|
| - }
|
| - bool noUpper = true;
|
| - UChar ored = 0;
|
| -
|
| - const UChar* end = characters16() + m_length;
|
| - for (const UChar* chp = characters16(); chp != end; ++chp) {
|
| - if (isASCIIUpper(*chp))
|
| - noUpper = false;
|
| - ored |= *chp;
|
| - }
|
| - // Nothing to do if the string is all ASCII with no uppercase.
|
| - if (noUpper && !(ored & ~0x7F))
|
| - return this;
|
| -
|
| - RELEASE_ASSERT(m_length <=
|
| - static_cast<unsigned>(numeric_limits<unsigned>::max()));
|
| - unsigned length = m_length;
|
| -
|
| - UChar* data16;
|
| - RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
|
| -
|
| - for (unsigned i = 0; i < length; ++i) {
|
| - UChar c = characters16()[i];
|
| - data16[i] = isASCIIUpper(c) ? toASCIILower(c) : c;
|
| - }
|
| - return newImpl.release();
|
| -}
|
| -
|
| -PassRefPtr<StringImpl> StringImpl::lower() {
|
| - // Note: This is a hot function in the Dromaeo benchmark, specifically the
|
| - // no-op code path up through the first 'return' statement.
|
| -
|
| - // First scan the string for uppercase and non-ASCII characters:
|
| - if (is8Bit()) {
|
| - unsigned firstIndexToBeLowered = m_length;
|
| - for (unsigned i = 0; i < m_length; ++i) {
|
| - LChar ch = characters8()[i];
|
| - if (UNLIKELY(isASCIIUpper(ch) || ch & ~0x7F)) {
|
| - firstIndexToBeLowered = i;
|
| - break;
|
| - }
|
| - }
|
| -
|
| - // Nothing to do if the string is all ASCII with no uppercase.
|
| - if (firstIndexToBeLowered == m_length)
|
| - return this;
|
| -
|
| - LChar* data8;
|
| - RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);
|
| - memcpy(data8, characters8(), firstIndexToBeLowered);
|
| -
|
| - for (unsigned i = firstIndexToBeLowered; i < m_length; ++i) {
|
| - LChar ch = characters8()[i];
|
| - data8[i] = UNLIKELY(ch & ~0x7F) ? static_cast<LChar>(Unicode::toLower(ch))
|
| - : toASCIILower(ch);
|
| - }
|
| -
|
| - return newImpl.release();
|
| - }
|
| -
|
| - bool noUpper = true;
|
| - UChar ored = 0;
|
| -
|
| - const UChar* end = characters16() + m_length;
|
| - for (const UChar* chp = characters16(); chp != end; ++chp) {
|
| - if (UNLIKELY(isASCIIUpper(*chp)))
|
| - noUpper = false;
|
| - ored |= *chp;
|
| - }
|
| - // Nothing to do if the string is all ASCII with no uppercase.
|
| - if (noUpper && !(ored & ~0x7F))
|
| - return this;
|
| -
|
| - RELEASE_ASSERT(m_length <=
|
| - static_cast<unsigned>(numeric_limits<int32_t>::max()));
|
| - int32_t length = m_length;
|
| -
|
| - if (!(ored & ~0x7F)) {
|
| - UChar* data16;
|
| - RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
|
| -
|
| - for (int32_t i = 0; i < length; ++i) {
|
| - UChar c = characters16()[i];
|
| - data16[i] = toASCIILower(c);
|
| - }
|
| - return newImpl.release();
|
| - }
|
| -
|
| - // Do a slower implementation for cases that include non-ASCII characters.
|
| - UChar* data16;
|
| - RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
|
| -
|
| - bool error;
|
| - int32_t realLength =
|
| - Unicode::toLower(data16, length, characters16(), m_length, &error);
|
| - if (!error && realLength == length)
|
| - return newImpl.release();
|
| -
|
| - newImpl = createUninitialized(realLength, data16);
|
| - Unicode::toLower(data16, realLength, characters16(), m_length, &error);
|
| - if (error)
|
| - return this;
|
| - return newImpl.release();
|
| -}
|
| -
|
| -PassRefPtr<StringImpl> StringImpl::upper() {
|
| - // This function could be optimized for no-op cases the way lower() is,
|
| - // but in empirical testing, few actual calls to upper() are no-ops, so
|
| - // it wouldn't be worth the extra time for pre-scanning.
|
| -
|
| - RELEASE_ASSERT(m_length <=
|
| - static_cast<unsigned>(numeric_limits<int32_t>::max()));
|
| - int32_t length = m_length;
|
| -
|
| - if (is8Bit()) {
|
| - LChar* data8;
|
| - RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);
|
| -
|
| - // Do a faster loop for the case where all the characters are ASCII.
|
| - LChar ored = 0;
|
| - for (int i = 0; i < length; ++i) {
|
| - LChar c = characters8()[i];
|
| - ored |= c;
|
| - data8[i] = toASCIIUpper(c);
|
| - }
|
| - if (!(ored & ~0x7F))
|
| - return newImpl.release();
|
| -
|
| - // Do a slower implementation for cases that include non-ASCII Latin-1
|
| - // characters.
|
| - int numberSharpSCharacters = 0;
|
| -
|
| - // There are two special cases.
|
| - // 1. latin-1 characters when converted to upper case are 16 bit
|
| - // characters.
|
| - // 2. Lower case sharp-S converts to "SS" (two characters)
|
| - for (int32_t i = 0; i < length; ++i) {
|
| - LChar c = characters8()[i];
|
| - if (UNLIKELY(c == smallLetterSharpSCharacter))
|
| - ++numberSharpSCharacters;
|
| - UChar upper = static_cast<UChar>(Unicode::toUpper(c));
|
| - if (UNLIKELY(upper > 0xff)) {
|
| - // Since this upper-cased character does not fit in an 8-bit string, we
|
| - // need to take the 16-bit path.
|
| - goto upconvert;
|
| - }
|
| - data8[i] = static_cast<LChar>(upper);
|
| - }
|
| -
|
| - if (!numberSharpSCharacters)
|
| - return newImpl.release();
|
| -
|
| - // We have numberSSCharacters sharp-s characters, but none of the other
|
| - // special characters.
|
| - newImpl = createUninitialized(m_length + numberSharpSCharacters, data8);
|
| -
|
| - LChar* dest = data8;
|
| -
|
| - for (int32_t i = 0; i < length; ++i) {
|
| - LChar c = characters8()[i];
|
| - if (c == smallLetterSharpSCharacter) {
|
| - *dest++ = 'S';
|
| - *dest++ = 'S';
|
| - } else {
|
| - *dest++ = static_cast<LChar>(Unicode::toUpper(c));
|
| - }
|
| - }
|
| -
|
| - return newImpl.release();
|
| - }
|
| -
|
| -upconvert:
|
| - RefPtr<StringImpl> upconverted = upconvertedString();
|
| - const UChar* source16 = upconverted->characters16();
|
| -
|
| - UChar* data16;
|
| - RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
|
| -
|
| - // Do a faster loop for the case where all the characters are ASCII.
|
| - UChar ored = 0;
|
| - for (int i = 0; i < length; ++i) {
|
| - UChar c = source16[i];
|
| - ored |= c;
|
| - data16[i] = toASCIIUpper(c);
|
| - }
|
| - if (!(ored & ~0x7F))
|
| - return newImpl.release();
|
| -
|
| - // Do a slower implementation for cases that include non-ASCII characters.
|
| - bool error;
|
| - int32_t realLength =
|
| - Unicode::toUpper(data16, length, source16, m_length, &error);
|
| - if (!error && realLength == length)
|
| - return newImpl;
|
| - newImpl = createUninitialized(realLength, data16);
|
| - Unicode::toUpper(data16, realLength, source16, m_length, &error);
|
| - if (error)
|
| - return this;
|
| - return newImpl.release();
|
| -}
|
| -
|
| -PassRefPtr<StringImpl> StringImpl::upperASCII() {
|
| - if (is8Bit()) {
|
| - LChar* data8;
|
| - RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);
|
| -
|
| - for (unsigned i = 0; i < m_length; ++i) {
|
| - LChar c = characters8()[i];
|
| - data8[i] = isASCIILower(c) ? toASCIIUpper(c) : c;
|
| - }
|
| - return newImpl.release();
|
| - }
|
| -
|
| - UChar* data16;
|
| - RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
|
| -
|
| - for (unsigned i = 0; i < m_length; ++i) {
|
| - UChar c = characters16()[i];
|
| - data16[i] = isASCIILower(c) ? toASCIIUpper(c) : c;
|
| - }
|
| - return newImpl.release();
|
| -}
|
| -
|
| -static inline bool localeIdMatchesLang(const AtomicString& localeId,
|
| - const StringView& lang) {
|
| - RELEASE_ASSERT(lang.length() >= 2 && lang.length() <= 3);
|
| - if (!localeId.impl() || !localeId.impl()->startsWithIgnoringCase(lang))
|
| - return false;
|
| - if (localeId.impl()->length() == lang.length())
|
| - return true;
|
| - const UChar maybeDelimiter = (*localeId.impl())[lang.length()];
|
| - return maybeDelimiter == '-' || maybeDelimiter == '_' ||
|
| - maybeDelimiter == '@';
|
| -}
|
| -
|
| -typedef int32_t (*icuCaseConverter)(UChar*,
|
| - int32_t,
|
| - const UChar*,
|
| - int32_t,
|
| - const char*,
|
| - UErrorCode*);
|
| -
|
| -static PassRefPtr<StringImpl> caseConvert(const UChar* source16,
|
| - size_t length,
|
| - icuCaseConverter converter,
|
| - const char* locale,
|
| - StringImpl* originalString) {
|
| - UChar* data16;
|
| - size_t targetLength = length;
|
| - RefPtr<StringImpl> output = StringImpl::createUninitialized(length, data16);
|
| - do {
|
| - UErrorCode status = U_ZERO_ERROR;
|
| - targetLength =
|
| - converter(data16, targetLength, source16, length, locale, &status);
|
| - if (U_SUCCESS(status)) {
|
| - if (length > 0)
|
| - return output->substring(0, targetLength);
|
| - return output.release();
|
| - }
|
| - if (status != U_BUFFER_OVERFLOW_ERROR)
|
| - return originalString;
|
| - // Expand the buffer.
|
| - output = StringImpl::createUninitialized(targetLength, data16);
|
| - } while (true);
|
| -}
|
| -
|
| -PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier) {
|
| - // Use the more optimized code path most of the time.
|
| - // Only Turkic (tr and az) languages and Lithuanian requires
|
| - // locale-specific lowercasing rules. Even though CLDR has el-Lower,
|
| - // it's identical to the locale-agnostic lowercasing. Context-dependent
|
| - // handling of Greek capital sigma is built into the common lowercasing
|
| - // function in ICU.
|
| - const char* localeForConversion = 0;
|
| - if (localeIdMatchesLang(localeIdentifier, "tr") ||
|
| - localeIdMatchesLang(localeIdentifier, "az"))
|
| - localeForConversion = "tr";
|
| - else if (localeIdMatchesLang(localeIdentifier, "lt"))
|
| - localeForConversion = "lt";
|
| - else
|
| - return lower();
|
| -
|
| - if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
|
| - CRASH();
|
| - int length = m_length;
|
| -
|
| - RefPtr<StringImpl> upconverted = upconvertedString();
|
| - const UChar* source16 = upconverted->characters16();
|
| - return caseConvert(source16, length, u_strToLower, localeForConversion, this);
|
| -}
|
| -
|
| -PassRefPtr<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier) {
|
| - // Use the more-optimized code path most of the time.
|
| - // Only Turkic (tr and az) languages, Greek and Lithuanian require
|
| - // locale-specific uppercasing rules.
|
| - const char* localeForConversion = 0;
|
| - if (localeIdMatchesLang(localeIdentifier, "tr") ||
|
| - localeIdMatchesLang(localeIdentifier, "az"))
|
| - localeForConversion = "tr";
|
| - else if (localeIdMatchesLang(localeIdentifier, "el"))
|
| - localeForConversion = "el";
|
| - else if (localeIdMatchesLang(localeIdentifier, "lt"))
|
| - localeForConversion = "lt";
|
| - else
|
| - return upper();
|
| -
|
| - if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
|
| - CRASH();
|
| - int length = m_length;
|
| -
|
| - RefPtr<StringImpl> upconverted = upconvertedString();
|
| - const UChar* source16 = upconverted->characters16();
|
| -
|
| - return caseConvert(source16, length, u_strToUpper, localeForConversion, this);
|
| -}
|
| -
|
| -PassRefPtr<StringImpl> StringImpl::fill(UChar character) {
|
| - if (!(character & ~0x7F)) {
|
| - LChar* data;
|
| - RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
|
| - for (unsigned i = 0; i < m_length; ++i)
|
| - data[i] = static_cast<LChar>(character);
|
| - return newImpl.release();
|
| - }
|
| - UChar* data;
|
| - RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
|
| - for (unsigned i = 0; i < m_length; ++i)
|
| - data[i] = character;
|
| - return newImpl.release();
|
| -}
|
| -
|
| -PassRefPtr<StringImpl> StringImpl::foldCase() {
|
| - RELEASE_ASSERT(m_length <=
|
| - static_cast<unsigned>(numeric_limits<int32_t>::max()));
|
| - int32_t length = m_length;
|
| -
|
| - if (is8Bit()) {
|
| - // Do a faster loop for the case where all the characters are ASCII.
|
| - LChar* data;
|
| - RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
|
| - LChar ored = 0;
|
| -
|
| - for (int32_t i = 0; i < length; ++i) {
|
| - LChar c = characters8()[i];
|
| - data[i] = toASCIILower(c);
|
| - ored |= c;
|
| - }
|
| -
|
| - if (!(ored & ~0x7F))
|
| - return newImpl.release();
|
| -
|
| - // Do a slower implementation for cases that include non-ASCII Latin-1
|
| - // characters.
|
| - for (int32_t i = 0; i < length; ++i)
|
| - data[i] = static_cast<LChar>(Unicode::toLower(characters8()[i]));
|
| -
|
| - return newImpl.release();
|
| - }
|
| -
|
| - // Do a faster loop for the case where all the characters are ASCII.
|
| - UChar* data;
|
| - RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
|
| - UChar ored = 0;
|
| - for (int32_t i = 0; i < length; ++i) {
|
| - UChar c = characters16()[i];
|
| - ored |= c;
|
| - data[i] = toASCIILower(c);
|
| - }
|
| - if (!(ored & ~0x7F))
|
| - return newImpl.release();
|
| -
|
| - // Do a slower implementation for cases that include non-ASCII characters.
|
| - bool error;
|
| - int32_t realLength =
|
| - Unicode::foldCase(data, length, characters16(), m_length, &error);
|
| - if (!error && realLength == length)
|
| - return newImpl.release();
|
| - newImpl = createUninitialized(realLength, data);
|
| - Unicode::foldCase(data, realLength, characters16(), m_length, &error);
|
| - if (error)
|
| - return this;
|
| - return newImpl.release();
|
| -}
|
| -
|
| -PassRefPtr<StringImpl> StringImpl::truncate(unsigned length) {
|
| - if (length >= m_length)
|
| - return this;
|
| - if (is8Bit())
|
| - return create(characters8(), length);
|
| - return create(characters16(), length);
|
| -}
|
| -
|
| -template <class UCharPredicate>
|
| -inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters(
|
| - UCharPredicate predicate) {
|
| - if (!m_length)
|
| - return empty;
|
| -
|
| - unsigned start = 0;
|
| - unsigned end = m_length - 1;
|
| -
|
| - // skip white space from start
|
| - while (start <= end &&
|
| - predicate(is8Bit() ? characters8()[start] : characters16()[start]))
|
| - ++start;
|
| -
|
| - // only white space
|
| - if (start > end)
|
| - return empty;
|
| -
|
| - // skip white space from end
|
| - while (end && predicate(is8Bit() ? characters8()[end] : characters16()[end]))
|
| - --end;
|
| -
|
| - if (!start && end == m_length - 1)
|
| - return this;
|
| - if (is8Bit())
|
| - return create(characters8() + start, end + 1 - start);
|
| - return create(characters16() + start, end + 1 - start);
|
| -}
|
| -
|
| -class UCharPredicate final {
|
| - STACK_ALLOCATED();
|
| -
|
| - public:
|
| - inline UCharPredicate(CharacterMatchFunctionPtr function)
|
| - : m_function(function) {}
|
| -
|
| - inline bool operator()(UChar ch) const { return m_function(ch); }
|
| -
|
| - private:
|
| - const CharacterMatchFunctionPtr m_function;
|
| -};
|
| -
|
| -class SpaceOrNewlinePredicate final {
|
| - STACK_ALLOCATED();
|
| -
|
| - public:
|
| - inline bool operator()(UChar ch) const { return isSpaceOrNewline(ch); }
|
| -};
|
| -
|
| -PassRefPtr<StringImpl> StringImpl::stripWhiteSpace() {
|
| - return stripMatchedCharacters(SpaceOrNewlinePredicate());
|
| -}
|
| -
|
| -PassRefPtr<StringImpl> StringImpl::stripWhiteSpace(
|
| - IsWhiteSpaceFunctionPtr isWhiteSpace) {
|
| - return stripMatchedCharacters(UCharPredicate(isWhiteSpace));
|
| -}
|
| -
|
| -template <typename CharType>
|
| -ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters(
|
| - const CharType* characters,
|
| - CharacterMatchFunctionPtr findMatch) {
|
| - const CharType* from = characters;
|
| - const CharType* fromend = from + m_length;
|
| -
|
| - // Assume the common case will not remove any characters
|
| - while (from != fromend && !findMatch(*from))
|
| - ++from;
|
| - if (from == fromend)
|
| - return this;
|
| -
|
| - StringBuffer<CharType> data(m_length);
|
| - CharType* to = data.characters();
|
| - unsigned outc = from - characters;
|
| -
|
| - if (outc)
|
| - memcpy(to, characters, outc * sizeof(CharType));
|
| -
|
| - while (true) {
|
| - while (from != fromend && findMatch(*from))
|
| - ++from;
|
| - while (from != fromend && !findMatch(*from))
|
| - to[outc++] = *from++;
|
| - if (from == fromend)
|
| - break;
|
| - }
|
| -
|
| - data.shrink(outc);
|
| -
|
| - return data.release();
|
| -}
|
| -
|
| -PassRefPtr<StringImpl> StringImpl::removeCharacters(
|
| - CharacterMatchFunctionPtr findMatch) {
|
| - if (is8Bit())
|
| - return removeCharacters(characters8(), findMatch);
|
| - return removeCharacters(characters16(), findMatch);
|
| -}
|
| -
|
| -PassRefPtr<StringImpl> StringImpl::remove(unsigned start,
|
| - unsigned lengthToRemove) {
|
| - if (lengthToRemove <= 0)
|
| - return this;
|
| - if (start >= m_length)
|
| - return this;
|
| -
|
| - lengthToRemove = std::min(m_length - start, lengthToRemove);
|
| - unsigned removedEnd = start + lengthToRemove;
|
| -
|
| - if (is8Bit()) {
|
| - StringBuffer<LChar> buffer(m_length - lengthToRemove);
|
| - copyChars(buffer.characters(), characters8(), start);
|
| - copyChars(buffer.characters() + start, characters8() + removedEnd,
|
| - m_length - removedEnd);
|
| - return buffer.release();
|
| - }
|
| - StringBuffer<UChar> buffer(m_length - lengthToRemove);
|
| - copyChars(buffer.characters(), characters16(), start);
|
| - copyChars(buffer.characters() + start, characters16() + removedEnd,
|
| - m_length - removedEnd);
|
| - return buffer.release();
|
| -}
|
| -
|
| -template <typename CharType, class UCharPredicate>
|
| -inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(
|
| - UCharPredicate predicate,
|
| - StripBehavior stripBehavior) {
|
| - StringBuffer<CharType> data(m_length);
|
| -
|
| - const CharType* from = getCharacters<CharType>();
|
| - const CharType* fromend = from + m_length;
|
| - int outc = 0;
|
| - bool changedToSpace = false;
|
| -
|
| - CharType* to = data.characters();
|
| -
|
| - if (stripBehavior == StripExtraWhiteSpace) {
|
| - while (true) {
|
| - while (from != fromend && predicate(*from)) {
|
| - if (*from != ' ')
|
| - changedToSpace = true;
|
| - ++from;
|
| - }
|
| - while (from != fromend && !predicate(*from))
|
| - to[outc++] = *from++;
|
| - if (from != fromend)
|
| - to[outc++] = ' ';
|
| - else
|
| - break;
|
| - }
|
| -
|
| - if (outc > 0 && to[outc - 1] == ' ')
|
| - --outc;
|
| - } else {
|
| - for (; from != fromend; ++from) {
|
| - if (predicate(*from)) {
|
| - if (*from != ' ')
|
| - changedToSpace = true;
|
| - to[outc++] = ' ';
|
| - } else {
|
| - to[outc++] = *from;
|
| - }
|
| - }
|
| - }
|
| -
|
| - if (static_cast<unsigned>(outc) == m_length && !changedToSpace)
|
| - return this;
|
| -
|
| - data.shrink(outc);
|
| -
|
| - return data.release();
|
| -}
|
| -
|
| -PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(
|
| - StripBehavior stripBehavior) {
|
| - if (is8Bit())
|
| - return StringImpl::simplifyMatchedCharactersToSpace<LChar>(
|
| - SpaceOrNewlinePredicate(), stripBehavior);
|
| - return StringImpl::simplifyMatchedCharactersToSpace<UChar>(
|
| - SpaceOrNewlinePredicate(), stripBehavior);
|
| -}
|
| -
|
| -PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(
|
| - IsWhiteSpaceFunctionPtr isWhiteSpace,
|
| - StripBehavior stripBehavior) {
|
| - if (is8Bit())
|
| - return StringImpl::simplifyMatchedCharactersToSpace<LChar>(
|
| - UCharPredicate(isWhiteSpace), stripBehavior);
|
| - return StringImpl::simplifyMatchedCharactersToSpace<UChar>(
|
| - UCharPredicate(isWhiteSpace), stripBehavior);
|
| -}
|
| -
|
| -int StringImpl::toIntStrict(bool* ok, int base) {
|
| - if (is8Bit())
|
| - return charactersToIntStrict(characters8(), m_length, ok, base);
|
| - return charactersToIntStrict(characters16(), m_length, ok, base);
|
| -}
|
| -
|
| -unsigned StringImpl::toUIntStrict(bool* ok, int base) {
|
| - if (is8Bit())
|
| - return charactersToUIntStrict(characters8(), m_length, ok, base);
|
| - return charactersToUIntStrict(characters16(), m_length, ok, base);
|
| -}
|
| -
|
| -int64_t StringImpl::toInt64Strict(bool* ok, int base) {
|
| - if (is8Bit())
|
| - return charactersToInt64Strict(characters8(), m_length, ok, base);
|
| - return charactersToInt64Strict(characters16(), m_length, ok, base);
|
| -}
|
| -
|
| -uint64_t StringImpl::toUInt64Strict(bool* ok, int base) {
|
| - if (is8Bit())
|
| - return charactersToUInt64Strict(characters8(), m_length, ok, base);
|
| - return charactersToUInt64Strict(characters16(), m_length, ok, base);
|
| -}
|
| -
|
| -int StringImpl::toInt(bool* ok) {
|
| - if (is8Bit())
|
| - return charactersToInt(characters8(), m_length, ok);
|
| - return charactersToInt(characters16(), m_length, ok);
|
| -}
|
| -
|
| -unsigned StringImpl::toUInt(bool* ok) {
|
| - if (is8Bit())
|
| - return charactersToUInt(characters8(), m_length, ok);
|
| - return charactersToUInt(characters16(), m_length, ok);
|
| -}
|
| -
|
| -int64_t StringImpl::toInt64(bool* ok) {
|
| - if (is8Bit())
|
| - return charactersToInt64(characters8(), m_length, ok);
|
| - return charactersToInt64(characters16(), m_length, ok);
|
| -}
|
| -
|
| -uint64_t StringImpl::toUInt64(bool* ok) {
|
| - if (is8Bit())
|
| - return charactersToUInt64(characters8(), m_length, ok);
|
| - return charactersToUInt64(characters16(), m_length, ok);
|
| -}
|
| -
|
| -double StringImpl::toDouble(bool* ok) {
|
| - if (is8Bit())
|
| - return charactersToDouble(characters8(), m_length, ok);
|
| - return charactersToDouble(characters16(), m_length, ok);
|
| -}
|
| -
|
| -float StringImpl::toFloat(bool* ok) {
|
| - if (is8Bit())
|
| - return charactersToFloat(characters8(), m_length, ok);
|
| - return charactersToFloat(characters16(), m_length, ok);
|
| -}
|
| -
|
| -// Table is based on ftp://ftp.unicode.org/Public/UNIDATA/CaseFolding.txt
|
| -const UChar StringImpl::latin1CaseFoldTable[256] = {
|
| - 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008,
|
| - 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011,
|
| - 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a,
|
| - 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023,
|
| - 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c,
|
| - 0x002d, 0x002e, 0x002f, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035,
|
| - 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e,
|
| - 0x003f, 0x0040, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
|
| - 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 0x0070,
|
| - 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079,
|
| - 0x007a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 0x0060, 0x0061, 0x0062,
|
| - 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b,
|
| - 0x006c, 0x006d, 0x006e, 0x006f, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074,
|
| - 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d,
|
| - 0x007e, 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086,
|
| - 0x0087, 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
|
| - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098,
|
| - 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1,
|
| - 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00aa,
|
| - 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0, 0x00b1, 0x00b2, 0x00b3,
|
| - 0x00b4, 0x03bc, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc,
|
| - 0x00bd, 0x00be, 0x00bf, 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5,
|
| - 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee,
|
| - 0x00ef, 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00d7,
|
| - 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00df, 0x00e0,
|
| - 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9,
|
| - 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 0x00f0, 0x00f1, 0x00f2,
|
| - 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00f9, 0x00fa, 0x00fb,
|
| - 0x00fc, 0x00fd, 0x00fe, 0x00ff,
|
| -};
|
| -
|
| -bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length) {
|
| - DCHECK_GE(length, 0u);
|
| - if (a == b)
|
| - return true;
|
| - while (length--) {
|
| - if (StringImpl::latin1CaseFoldTable[*a++] !=
|
| - StringImpl::latin1CaseFoldTable[*b++])
|
| - return false;
|
| - }
|
| - return true;
|
| -}
|
| -
|
| -bool equalIgnoringCase(const UChar* a, const UChar* b, unsigned length) {
|
| - DCHECK_GE(length, 0u);
|
| - if (a == b)
|
| - return true;
|
| - return !Unicode::umemcasecmp(a, b, length);
|
| -}
|
| -
|
| -bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length) {
|
| - while (length--) {
|
| - if (foldCase(*a++) != StringImpl::latin1CaseFoldTable[*b++])
|
| - return false;
|
| - }
|
| - return true;
|
| -}
|
| -
|
| -size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction,
|
| - unsigned start) {
|
| - if (is8Bit())
|
| - return WTF::find(characters8(), m_length, matchFunction, start);
|
| - return WTF::find(characters16(), m_length, matchFunction, start);
|
| -}
|
| -
|
| -template <typename SearchCharacterType, typename MatchCharacterType>
|
| -ALWAYS_INLINE static size_t findInternal(
|
| - const SearchCharacterType* searchCharacters,
|
| - const MatchCharacterType* matchCharacters,
|
| - unsigned index,
|
| - unsigned searchLength,
|
| - unsigned matchLength) {
|
| - // Optimization: keep a running hash of the strings,
|
| - // only call equal() if the hashes match.
|
| -
|
| - // delta is the number of additional times to test; delta == 0 means test only
|
| - // once.
|
| - unsigned delta = searchLength - matchLength;
|
| -
|
| - unsigned searchHash = 0;
|
| - unsigned matchHash = 0;
|
| -
|
| - for (unsigned i = 0; i < matchLength; ++i) {
|
| - searchHash += searchCharacters[i];
|
| - matchHash += matchCharacters[i];
|
| - }
|
| -
|
| - unsigned i = 0;
|
| - // keep looping until we match
|
| - while (searchHash != matchHash ||
|
| - !equal(searchCharacters + i, matchCharacters, matchLength)) {
|
| - if (i == delta)
|
| - return kNotFound;
|
| - searchHash += searchCharacters[i + matchLength];
|
| - searchHash -= searchCharacters[i];
|
| - ++i;
|
| - }
|
| - return index + i;
|
| -}
|
| -
|
| -size_t StringImpl::find(const StringView& matchString, unsigned index) {
|
| - if (UNLIKELY(matchString.isNull()))
|
| - return kNotFound;
|
| -
|
| - unsigned matchLength = matchString.length();
|
| -
|
| - // Optimization 1: fast case for strings of length 1.
|
| - if (matchLength == 1) {
|
| - if (is8Bit())
|
| - return WTF::find(characters8(), length(), matchString[0], index);
|
| - return WTF::find(characters16(), length(), matchString[0], index);
|
| - }
|
| -
|
| - if (UNLIKELY(!matchLength))
|
| - return min(index, length());
|
| -
|
| - // Check index & matchLength are in range.
|
| - if (index > length())
|
| - return kNotFound;
|
| - unsigned searchLength = length() - index;
|
| - if (matchLength > searchLength)
|
| - return kNotFound;
|
| -
|
| - if (is8Bit()) {
|
| - if (matchString.is8Bit())
|
| - return findInternal(characters8() + index, matchString.characters8(),
|
| - index, searchLength, matchLength);
|
| - return findInternal(characters8() + index, matchString.characters16(),
|
| - index, searchLength, matchLength);
|
| - }
|
| - if (matchString.is8Bit())
|
| - return findInternal(characters16() + index, matchString.characters8(),
|
| - index, searchLength, matchLength);
|
| - return findInternal(characters16() + index, matchString.characters16(), index,
|
| - searchLength, matchLength);
|
| -}
|
| -
|
| -template <typename SearchCharacterType, typename MatchCharacterType>
|
| -ALWAYS_INLINE static size_t findIgnoringCaseInternal(
|
| - const SearchCharacterType* searchCharacters,
|
| - const MatchCharacterType* matchCharacters,
|
| - unsigned index,
|
| - unsigned searchLength,
|
| - unsigned matchLength) {
|
| - // delta is the number of additional times to test; delta == 0 means test only
|
| - // once.
|
| - unsigned delta = searchLength - matchLength;
|
| -
|
| - unsigned i = 0;
|
| - // keep looping until we match
|
| - while (
|
| - !equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) {
|
| - if (i == delta)
|
| - return kNotFound;
|
| - ++i;
|
| - }
|
| - return index + i;
|
| -}
|
| -
|
| -size_t StringImpl::findIgnoringCase(const StringView& matchString,
|
| - unsigned index) {
|
| - if (UNLIKELY(matchString.isNull()))
|
| - return kNotFound;
|
| -
|
| - unsigned matchLength = matchString.length();
|
| - if (!matchLength)
|
| - return min(index, length());
|
| -
|
| - // Check index & matchLength are in range.
|
| - if (index > length())
|
| - return kNotFound;
|
| - unsigned searchLength = length() - index;
|
| - if (matchLength > searchLength)
|
| - return kNotFound;
|
| -
|
| - if (is8Bit()) {
|
| - if (matchString.is8Bit())
|
| - return findIgnoringCaseInternal(characters8() + index,
|
| - matchString.characters8(), index,
|
| - searchLength, matchLength);
|
| - return findIgnoringCaseInternal(characters8() + index,
|
| - matchString.characters16(), index,
|
| - searchLength, matchLength);
|
| - }
|
| - if (matchString.is8Bit())
|
| - return findIgnoringCaseInternal(characters16() + index,
|
| - matchString.characters8(), index,
|
| - searchLength, matchLength);
|
| - return findIgnoringCaseInternal(characters16() + index,
|
| - matchString.characters16(), index,
|
| - searchLength, matchLength);
|
| -}
|
| -
|
| -template <typename SearchCharacterType, typename MatchCharacterType>
|
| -ALWAYS_INLINE static size_t findIgnoringASCIICaseInternal(
|
| - const SearchCharacterType* searchCharacters,
|
| - const MatchCharacterType* matchCharacters,
|
| - unsigned index,
|
| - unsigned searchLength,
|
| - unsigned matchLength) {
|
| - // delta is the number of additional times to test; delta == 0 means test only
|
| - // once.
|
| - unsigned delta = searchLength - matchLength;
|
| -
|
| - unsigned i = 0;
|
| - // keep looping until we match
|
| - while (!equalIgnoringASCIICase(searchCharacters + i, matchCharacters,
|
| - matchLength)) {
|
| - if (i == delta)
|
| - return kNotFound;
|
| - ++i;
|
| - }
|
| - return index + i;
|
| -}
|
| -
|
| -size_t StringImpl::findIgnoringASCIICase(const StringView& matchString,
|
| - unsigned index) {
|
| - if (UNLIKELY(matchString.isNull()))
|
| - return kNotFound;
|
| -
|
| - unsigned matchLength = matchString.length();
|
| - if (!matchLength)
|
| - return min(index, length());
|
| -
|
| - // Check index & matchLength are in range.
|
| - if (index > length())
|
| - return kNotFound;
|
| - unsigned searchLength = length() - index;
|
| - if (matchLength > searchLength)
|
| - return kNotFound;
|
| -
|
| - if (is8Bit()) {
|
| - if (matchString.is8Bit())
|
| - return findIgnoringASCIICaseInternal(characters8() + index,
|
| - matchString.characters8(), index,
|
| - searchLength, matchLength);
|
| - return findIgnoringASCIICaseInternal(characters8() + index,
|
| - matchString.characters16(), index,
|
| - searchLength, matchLength);
|
| - }
|
| - if (matchString.is8Bit())
|
| - return findIgnoringASCIICaseInternal(characters16() + index,
|
| - matchString.characters8(), index,
|
| - searchLength, matchLength);
|
| - return findIgnoringASCIICaseInternal(characters16() + index,
|
| - matchString.characters16(), index,
|
| - searchLength, matchLength);
|
| -}
|
| -
|
| -size_t StringImpl::reverseFind(UChar c, unsigned index) {
|
| - if (is8Bit())
|
| - return WTF::reverseFind(characters8(), m_length, c, index);
|
| - return WTF::reverseFind(characters16(), m_length, c, index);
|
| -}
|
| -
|
| -template <typename SearchCharacterType, typename MatchCharacterType>
|
| -ALWAYS_INLINE static size_t reverseFindInternal(
|
| - const SearchCharacterType* searchCharacters,
|
| - const MatchCharacterType* matchCharacters,
|
| - unsigned index,
|
| - unsigned length,
|
| - unsigned matchLength) {
|
| - // Optimization: keep a running hash of the strings,
|
| - // only call equal if the hashes match.
|
| -
|
| - // delta is the number of additional times to test; delta == 0 means test only
|
| - // once.
|
| - unsigned delta = min(index, length - matchLength);
|
| -
|
| - unsigned searchHash = 0;
|
| - unsigned matchHash = 0;
|
| - for (unsigned i = 0; i < matchLength; ++i) {
|
| - searchHash += searchCharacters[delta + i];
|
| - matchHash += matchCharacters[i];
|
| - }
|
| -
|
| - // keep looping until we match
|
| - while (searchHash != matchHash ||
|
| - !equal(searchCharacters + delta, matchCharacters, matchLength)) {
|
| - if (!delta)
|
| - return kNotFound;
|
| - --delta;
|
| - searchHash -= searchCharacters[delta + matchLength];
|
| - searchHash += searchCharacters[delta];
|
| - }
|
| - return delta;
|
| -}
|
| -
|
| -size_t StringImpl::reverseFind(const StringView& matchString, unsigned index) {
|
| - if (UNLIKELY(matchString.isNull()))
|
| - return kNotFound;
|
| -
|
| - unsigned matchLength = matchString.length();
|
| - unsigned ourLength = length();
|
| - if (!matchLength)
|
| - return min(index, ourLength);
|
| -
|
| - // Optimization 1: fast case for strings of length 1.
|
| - if (matchLength == 1) {
|
| - if (is8Bit())
|
| - return WTF::reverseFind(characters8(), ourLength, matchString[0], index);
|
| - return WTF::reverseFind(characters16(), ourLength, matchString[0], index);
|
| - }
|
| -
|
| - // Check index & matchLength are in range.
|
| - if (matchLength > ourLength)
|
| - return kNotFound;
|
| -
|
| - if (is8Bit()) {
|
| - if (matchString.is8Bit())
|
| - return reverseFindInternal(characters8(), matchString.characters8(),
|
| - index, ourLength, matchLength);
|
| - return reverseFindInternal(characters8(), matchString.characters16(), index,
|
| - ourLength, matchLength);
|
| - }
|
| - if (matchString.is8Bit())
|
| - return reverseFindInternal(characters16(), matchString.characters8(), index,
|
| - ourLength, matchLength);
|
| - return reverseFindInternal(characters16(), matchString.characters16(), index,
|
| - ourLength, matchLength);
|
| -}
|
| -
|
| -bool StringImpl::startsWith(UChar character) const {
|
| - return m_length && (*this)[0] == character;
|
| -}
|
| -
|
| -bool StringImpl::startsWith(const StringView& prefix) const {
|
| - if (prefix.length() > length())
|
| - return false;
|
| - if (is8Bit()) {
|
| - if (prefix.is8Bit())
|
| - return equal(characters8(), prefix.characters8(), prefix.length());
|
| - return equal(characters8(), prefix.characters16(), prefix.length());
|
| - }
|
| - if (prefix.is8Bit())
|
| - return equal(characters16(), prefix.characters8(), prefix.length());
|
| - return equal(characters16(), prefix.characters16(), prefix.length());
|
| -}
|
| -
|
| -bool StringImpl::startsWithIgnoringCase(const StringView& prefix) const {
|
| - if (prefix.length() > length())
|
| - return false;
|
| - if (is8Bit()) {
|
| - if (prefix.is8Bit())
|
| - return equalIgnoringCase(characters8(), prefix.characters8(),
|
| - prefix.length());
|
| - return equalIgnoringCase(characters8(), prefix.characters16(),
|
| - prefix.length());
|
| - }
|
| - if (prefix.is8Bit())
|
| - return equalIgnoringCase(characters16(), prefix.characters8(),
|
| - prefix.length());
|
| - return equalIgnoringCase(characters16(), prefix.characters16(),
|
| - prefix.length());
|
| -}
|
| -
|
| -bool StringImpl::startsWithIgnoringASCIICase(const StringView& prefix) const {
|
| - if (prefix.length() > length())
|
| - return false;
|
| - if (is8Bit()) {
|
| - if (prefix.is8Bit())
|
| - return equalIgnoringASCIICase(characters8(), prefix.characters8(),
|
| - prefix.length());
|
| - return equalIgnoringASCIICase(characters8(), prefix.characters16(),
|
| - prefix.length());
|
| - }
|
| - if (prefix.is8Bit())
|
| - return equalIgnoringASCIICase(characters16(), prefix.characters8(),
|
| - prefix.length());
|
| - return equalIgnoringASCIICase(characters16(), prefix.characters16(),
|
| - prefix.length());
|
| -}
|
| -
|
| -bool StringImpl::endsWith(UChar character) const {
|
| - return m_length && (*this)[m_length - 1] == character;
|
| -}
|
| -
|
| -bool StringImpl::endsWith(const StringView& suffix) const {
|
| - if (suffix.length() > length())
|
| - return false;
|
| - unsigned startOffset = length() - suffix.length();
|
| - if (is8Bit()) {
|
| - if (suffix.is8Bit())
|
| - return equal(characters8() + startOffset, suffix.characters8(),
|
| - suffix.length());
|
| - return equal(characters8() + startOffset, suffix.characters16(),
|
| - suffix.length());
|
| - }
|
| - if (suffix.is8Bit())
|
| - return equal(characters16() + startOffset, suffix.characters8(),
|
| - suffix.length());
|
| - return equal(characters16() + startOffset, suffix.characters16(),
|
| - suffix.length());
|
| -}
|
| -
|
| -bool StringImpl::endsWithIgnoringCase(const StringView& suffix) const {
|
| - if (suffix.length() > length())
|
| - return false;
|
| - unsigned startOffset = length() - suffix.length();
|
| - if (is8Bit()) {
|
| - if (suffix.is8Bit())
|
| - return equalIgnoringCase(characters8() + startOffset,
|
| - suffix.characters8(), suffix.length());
|
| - return equalIgnoringCase(characters8() + startOffset, suffix.characters16(),
|
| - suffix.length());
|
| - }
|
| - if (suffix.is8Bit())
|
| - return equalIgnoringCase(characters16() + startOffset, suffix.characters8(),
|
| - suffix.length());
|
| - return equalIgnoringCase(characters16() + startOffset, suffix.characters16(),
|
| - suffix.length());
|
| -}
|
| -
|
| -bool StringImpl::endsWithIgnoringASCIICase(const StringView& suffix) const {
|
| - if (suffix.length() > length())
|
| - return false;
|
| - unsigned startOffset = length() - suffix.length();
|
| - if (is8Bit()) {
|
| - if (suffix.is8Bit())
|
| - return equalIgnoringASCIICase(characters8() + startOffset,
|
| - suffix.characters8(), suffix.length());
|
| - return equalIgnoringASCIICase(characters8() + startOffset,
|
| - suffix.characters16(), suffix.length());
|
| - }
|
| - if (suffix.is8Bit())
|
| - return equalIgnoringASCIICase(characters16() + startOffset,
|
| - suffix.characters8(), suffix.length());
|
| - return equalIgnoringASCIICase(characters16() + startOffset,
|
| - suffix.characters16(), suffix.length());
|
| -}
|
| -
|
| -PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC) {
|
| - if (oldC == newC)
|
| - return this;
|
| -
|
| - if (find(oldC) == kNotFound)
|
| - return this;
|
| -
|
| - unsigned i;
|
| - if (is8Bit()) {
|
| - if (newC <= 0xff) {
|
| - LChar* data;
|
| - LChar oldChar = static_cast<LChar>(oldC);
|
| - LChar newChar = static_cast<LChar>(newC);
|
| -
|
| - RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
|
| -
|
| - for (i = 0; i != m_length; ++i) {
|
| - LChar ch = characters8()[i];
|
| - if (ch == oldChar)
|
| - ch = newChar;
|
| - data[i] = ch;
|
| - }
|
| - return newImpl.release();
|
| - }
|
| -
|
| - // There is the possibility we need to up convert from 8 to 16 bit,
|
| - // create a 16 bit string for the result.
|
| - UChar* data;
|
| - RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
|
| -
|
| - for (i = 0; i != m_length; ++i) {
|
| - UChar ch = characters8()[i];
|
| - if (ch == oldC)
|
| - ch = newC;
|
| - data[i] = ch;
|
| - }
|
| -
|
| - return newImpl.release();
|
| - }
|
| -
|
| - UChar* data;
|
| - RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
|
| -
|
| - for (i = 0; i != m_length; ++i) {
|
| - UChar ch = characters16()[i];
|
| - if (ch == oldC)
|
| - ch = newC;
|
| - data[i] = ch;
|
| - }
|
| - return newImpl.release();
|
| -}
|
| -
|
| -// TODO(esprehn): Passing a null replacement is the same as empty string for
|
| -// this method but all others treat null as a no-op. We should choose one
|
| -// behavior.
|
| -PassRefPtr<StringImpl> StringImpl::replace(unsigned position,
|
| - unsigned lengthToReplace,
|
| - const StringView& string) {
|
| - position = min(position, length());
|
| - lengthToReplace = min(lengthToReplace, length() - position);
|
| - unsigned lengthToInsert = string.length();
|
| - if (!lengthToReplace && !lengthToInsert)
|
| - return this;
|
| -
|
| - RELEASE_ASSERT((length() - lengthToReplace) <
|
| - (numeric_limits<unsigned>::max() - lengthToInsert));
|
| -
|
| - if (is8Bit() && (string.isNull() || string.is8Bit())) {
|
| - LChar* data;
|
| - RefPtr<StringImpl> newImpl =
|
| - createUninitialized(length() - lengthToReplace + lengthToInsert, data);
|
| - memcpy(data, characters8(), position * sizeof(LChar));
|
| - if (!string.isNull())
|
| - memcpy(data + position, string.characters8(),
|
| - lengthToInsert * sizeof(LChar));
|
| - memcpy(data + position + lengthToInsert,
|
| - characters8() + position + lengthToReplace,
|
| - (length() - position - lengthToReplace) * sizeof(LChar));
|
| - return newImpl.release();
|
| - }
|
| - UChar* data;
|
| - RefPtr<StringImpl> newImpl =
|
| - createUninitialized(length() - lengthToReplace + lengthToInsert, data);
|
| - if (is8Bit())
|
| - for (unsigned i = 0; i < position; ++i)
|
| - data[i] = characters8()[i];
|
| - else
|
| - memcpy(data, characters16(), position * sizeof(UChar));
|
| - if (!string.isNull()) {
|
| - if (string.is8Bit())
|
| - for (unsigned i = 0; i < lengthToInsert; ++i)
|
| - data[i + position] = string.characters8()[i];
|
| - else
|
| - memcpy(data + position, string.characters16(),
|
| - lengthToInsert * sizeof(UChar));
|
| - }
|
| - if (is8Bit()) {
|
| - for (unsigned i = 0; i < length() - position - lengthToReplace; ++i)
|
| - data[i + position + lengthToInsert] =
|
| - characters8()[i + position + lengthToReplace];
|
| - } else {
|
| - memcpy(data + position + lengthToInsert,
|
| - characters16() + position + lengthToReplace,
|
| - (length() - position - lengthToReplace) * sizeof(UChar));
|
| - }
|
| - return newImpl.release();
|
| -}
|
| -
|
| -PassRefPtr<StringImpl> StringImpl::replace(UChar pattern,
|
| - const StringView& replacement) {
|
| - if (replacement.isNull())
|
| - return this;
|
| - if (replacement.is8Bit())
|
| - return replace(pattern, replacement.characters8(), replacement.length());
|
| - return replace(pattern, replacement.characters16(), replacement.length());
|
| -}
|
| -
|
| -PassRefPtr<StringImpl> StringImpl::replace(UChar pattern,
|
| - const LChar* replacement,
|
| - unsigned repStrLength) {
|
| - DCHECK(replacement);
|
| -
|
| - size_t srcSegmentStart = 0;
|
| - unsigned matchCount = 0;
|
| -
|
| - // Count the matches.
|
| - while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
|
| - ++matchCount;
|
| - ++srcSegmentStart;
|
| - }
|
| -
|
| - // If we have 0 matches then we don't have to do any more work.
|
| - if (!matchCount)
|
| - return this;
|
| -
|
| - RELEASE_ASSERT(!repStrLength ||
|
| - matchCount <= numeric_limits<unsigned>::max() / repStrLength);
|
| -
|
| - unsigned replaceSize = matchCount * repStrLength;
|
| - unsigned newSize = m_length - matchCount;
|
| - RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
|
| -
|
| - newSize += replaceSize;
|
| -
|
| - // Construct the new data.
|
| - size_t srcSegmentEnd;
|
| - unsigned srcSegmentLength;
|
| - srcSegmentStart = 0;
|
| - unsigned dstOffset = 0;
|
| -
|
| - if (is8Bit()) {
|
| - LChar* data;
|
| - RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
|
| -
|
| - while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
|
| - srcSegmentLength = srcSegmentEnd - srcSegmentStart;
|
| - memcpy(data + dstOffset, characters8() + srcSegmentStart,
|
| - srcSegmentLength * sizeof(LChar));
|
| - dstOffset += srcSegmentLength;
|
| - memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar));
|
| - dstOffset += repStrLength;
|
| - srcSegmentStart = srcSegmentEnd + 1;
|
| - }
|
| -
|
| - srcSegmentLength = m_length - srcSegmentStart;
|
| - memcpy(data + dstOffset, characters8() + srcSegmentStart,
|
| - srcSegmentLength * sizeof(LChar));
|
| -
|
| - DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length());
|
| -
|
| - return newImpl.release();
|
| - }
|
| -
|
| - UChar* data;
|
| - RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
|
| -
|
| - while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
|
| - srcSegmentLength = srcSegmentEnd - srcSegmentStart;
|
| - memcpy(data + dstOffset, characters16() + srcSegmentStart,
|
| - srcSegmentLength * sizeof(UChar));
|
| -
|
| - dstOffset += srcSegmentLength;
|
| - for (unsigned i = 0; i < repStrLength; ++i)
|
| - data[i + dstOffset] = replacement[i];
|
| -
|
| - dstOffset += repStrLength;
|
| - srcSegmentStart = srcSegmentEnd + 1;
|
| - }
|
| -
|
| - srcSegmentLength = m_length - srcSegmentStart;
|
| - memcpy(data + dstOffset, characters16() + srcSegmentStart,
|
| - srcSegmentLength * sizeof(UChar));
|
| -
|
| - DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length());
|
| -
|
| - return newImpl.release();
|
| -}
|
| -
|
| -PassRefPtr<StringImpl> StringImpl::replace(UChar pattern,
|
| - const UChar* replacement,
|
| - unsigned repStrLength) {
|
| - DCHECK(replacement);
|
| -
|
| - size_t srcSegmentStart = 0;
|
| - unsigned matchCount = 0;
|
| -
|
| - // Count the matches.
|
| - while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
|
| - ++matchCount;
|
| - ++srcSegmentStart;
|
| - }
|
| -
|
| - // If we have 0 matches then we don't have to do any more work.
|
| - if (!matchCount)
|
| - return this;
|
| -
|
| - RELEASE_ASSERT(!repStrLength ||
|
| - matchCount <= numeric_limits<unsigned>::max() / repStrLength);
|
| -
|
| - unsigned replaceSize = matchCount * repStrLength;
|
| - unsigned newSize = m_length - matchCount;
|
| - RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
|
| -
|
| - newSize += replaceSize;
|
| -
|
| - // Construct the new data.
|
| - size_t srcSegmentEnd;
|
| - unsigned srcSegmentLength;
|
| - srcSegmentStart = 0;
|
| - unsigned dstOffset = 0;
|
| -
|
| - if (is8Bit()) {
|
| - UChar* data;
|
| - RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
|
| -
|
| - while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
|
| - srcSegmentLength = srcSegmentEnd - srcSegmentStart;
|
| - for (unsigned i = 0; i < srcSegmentLength; ++i)
|
| - data[i + dstOffset] = characters8()[i + srcSegmentStart];
|
| -
|
| - dstOffset += srcSegmentLength;
|
| - memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
|
| -
|
| - dstOffset += repStrLength;
|
| - srcSegmentStart = srcSegmentEnd + 1;
|
| - }
|
| -
|
| - srcSegmentLength = m_length - srcSegmentStart;
|
| - for (unsigned i = 0; i < srcSegmentLength; ++i)
|
| - data[i + dstOffset] = characters8()[i + srcSegmentStart];
|
| -
|
| - DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length());
|
| -
|
| - return newImpl.release();
|
| - }
|
| -
|
| - UChar* data;
|
| - RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
|
| -
|
| - while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
|
| - srcSegmentLength = srcSegmentEnd - srcSegmentStart;
|
| - memcpy(data + dstOffset, characters16() + srcSegmentStart,
|
| - srcSegmentLength * sizeof(UChar));
|
| -
|
| - dstOffset += srcSegmentLength;
|
| - memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
|
| -
|
| - dstOffset += repStrLength;
|
| - srcSegmentStart = srcSegmentEnd + 1;
|
| - }
|
| -
|
| - srcSegmentLength = m_length - srcSegmentStart;
|
| - memcpy(data + dstOffset, characters16() + srcSegmentStart,
|
| - srcSegmentLength * sizeof(UChar));
|
| -
|
| - DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length());
|
| -
|
| - return newImpl.release();
|
| -}
|
| -
|
| -PassRefPtr<StringImpl> StringImpl::replace(const StringView& pattern,
|
| - const StringView& replacement) {
|
| - if (pattern.isNull() || replacement.isNull())
|
| - return this;
|
| -
|
| - unsigned patternLength = pattern.length();
|
| - if (!patternLength)
|
| - return this;
|
| -
|
| - unsigned repStrLength = replacement.length();
|
| - size_t srcSegmentStart = 0;
|
| - unsigned matchCount = 0;
|
| -
|
| - // Count the matches.
|
| - while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
|
| - ++matchCount;
|
| - srcSegmentStart += patternLength;
|
| - }
|
| -
|
| - // If we have 0 matches, we don't have to do any more work
|
| - if (!matchCount)
|
| - return this;
|
| -
|
| - unsigned newSize = m_length - matchCount * patternLength;
|
| - RELEASE_ASSERT(!repStrLength ||
|
| - matchCount <= numeric_limits<unsigned>::max() / repStrLength);
|
| -
|
| - RELEASE_ASSERT(newSize <=
|
| - (numeric_limits<unsigned>::max() - matchCount * repStrLength));
|
| -
|
| - newSize += matchCount * repStrLength;
|
| -
|
| - // Construct the new data
|
| - size_t srcSegmentEnd;
|
| - unsigned srcSegmentLength;
|
| - srcSegmentStart = 0;
|
| - unsigned dstOffset = 0;
|
| - bool srcIs8Bit = is8Bit();
|
| - bool replacementIs8Bit = replacement.is8Bit();
|
| -
|
| - // There are 4 cases:
|
| - // 1. This and replacement are both 8 bit.
|
| - // 2. This and replacement are both 16 bit.
|
| - // 3. This is 8 bit and replacement is 16 bit.
|
| - // 4. This is 16 bit and replacement is 8 bit.
|
| - if (srcIs8Bit && replacementIs8Bit) {
|
| - // Case 1
|
| - LChar* data;
|
| - RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
|
| - while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
|
| - srcSegmentLength = srcSegmentEnd - srcSegmentStart;
|
| - memcpy(data + dstOffset, characters8() + srcSegmentStart,
|
| - srcSegmentLength * sizeof(LChar));
|
| - dstOffset += srcSegmentLength;
|
| - memcpy(data + dstOffset, replacement.characters8(),
|
| - repStrLength * sizeof(LChar));
|
| - dstOffset += repStrLength;
|
| - srcSegmentStart = srcSegmentEnd + patternLength;
|
| - }
|
| -
|
| - srcSegmentLength = m_length - srcSegmentStart;
|
| - memcpy(data + dstOffset, characters8() + srcSegmentStart,
|
| - srcSegmentLength * sizeof(LChar));
|
| -
|
| - DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length());
|
| -
|
| - return newImpl.release();
|
| - }
|
| -
|
| - UChar* data;
|
| - RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
|
| - while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
|
| - srcSegmentLength = srcSegmentEnd - srcSegmentStart;
|
| - if (srcIs8Bit) {
|
| - // Case 3.
|
| - for (unsigned i = 0; i < srcSegmentLength; ++i)
|
| - data[i + dstOffset] = characters8()[i + srcSegmentStart];
|
| - } else {
|
| - // Case 2 & 4.
|
| - memcpy(data + dstOffset, characters16() + srcSegmentStart,
|
| - srcSegmentLength * sizeof(UChar));
|
| - }
|
| - dstOffset += srcSegmentLength;
|
| - if (replacementIs8Bit) {
|
| - // Cases 2 & 3.
|
| - for (unsigned i = 0; i < repStrLength; ++i)
|
| - data[i + dstOffset] = replacement.characters8()[i];
|
| - } else {
|
| - // Case 4
|
| - memcpy(data + dstOffset, replacement.characters16(),
|
| - repStrLength * sizeof(UChar));
|
| - }
|
| - dstOffset += repStrLength;
|
| - srcSegmentStart = srcSegmentEnd + patternLength;
|
| - }
|
| -
|
| - srcSegmentLength = m_length - srcSegmentStart;
|
| - if (srcIs8Bit) {
|
| - // Case 3.
|
| - for (unsigned i = 0; i < srcSegmentLength; ++i)
|
| - data[i + dstOffset] = characters8()[i + srcSegmentStart];
|
| - } else {
|
| - // Cases 2 & 4.
|
| - memcpy(data + dstOffset, characters16() + srcSegmentStart,
|
| - srcSegmentLength * sizeof(UChar));
|
| - }
|
| -
|
| - DCHECK_EQ(dstOffset + srcSegmentLength, newImpl->length());
|
| -
|
| - return newImpl.release();
|
| -}
|
| -
|
| -PassRefPtr<StringImpl> StringImpl::upconvertedString() {
|
| - if (is8Bit())
|
| - return String::make16BitFrom8BitSource(characters8(), m_length)
|
| - .releaseImpl();
|
| - return this;
|
| -}
|
| -
|
| -static inline bool stringImplContentEqual(const StringImpl* a,
|
| - const StringImpl* b) {
|
| - unsigned aLength = a->length();
|
| - unsigned bLength = b->length();
|
| - if (aLength != bLength)
|
| - return false;
|
| -
|
| - if (a->is8Bit()) {
|
| - if (b->is8Bit())
|
| - return equal(a->characters8(), b->characters8(), aLength);
|
| -
|
| - return equal(a->characters8(), b->characters16(), aLength);
|
| - }
|
| -
|
| - if (b->is8Bit())
|
| - return equal(a->characters16(), b->characters8(), aLength);
|
| -
|
| - return equal(a->characters16(), b->characters16(), aLength);
|
| -}
|
| -
|
| -bool equal(const StringImpl* a, const StringImpl* b) {
|
| - if (a == b)
|
| - return true;
|
| - if (!a || !b)
|
| - return false;
|
| - if (a->isAtomic() && b->isAtomic())
|
| - return false;
|
| -
|
| - return stringImplContentEqual(a, b);
|
| -}
|
| -
|
| -template <typename CharType>
|
| -inline bool equalInternal(const StringImpl* a,
|
| - const CharType* b,
|
| - unsigned length) {
|
| - if (!a)
|
| - return !b;
|
| - if (!b)
|
| - return false;
|
| -
|
| - if (a->length() != length)
|
| - return false;
|
| - if (a->is8Bit())
|
| - return equal(a->characters8(), b, length);
|
| - return equal(a->characters16(), b, length);
|
| -}
|
| -
|
| -bool equal(const StringImpl* a, const LChar* b, unsigned length) {
|
| - return equalInternal(a, b, length);
|
| -}
|
| -
|
| -bool equal(const StringImpl* a, const UChar* b, unsigned length) {
|
| - return equalInternal(a, b, length);
|
| -}
|
| -
|
| -bool equal(const StringImpl* a, const LChar* b) {
|
| - if (!a)
|
| - return !b;
|
| - if (!b)
|
| - return !a;
|
| -
|
| - unsigned length = a->length();
|
| -
|
| - if (a->is8Bit()) {
|
| - const LChar* aPtr = a->characters8();
|
| - for (unsigned i = 0; i != length; ++i) {
|
| - LChar bc = b[i];
|
| - LChar ac = aPtr[i];
|
| - if (!bc)
|
| - return false;
|
| - if (ac != bc)
|
| - return false;
|
| - }
|
| -
|
| - return !b[length];
|
| - }
|
| -
|
| - const UChar* aPtr = a->characters16();
|
| - for (unsigned i = 0; i != length; ++i) {
|
| - LChar bc = b[i];
|
| - if (!bc)
|
| - return false;
|
| - if (aPtr[i] != bc)
|
| - return false;
|
| - }
|
| -
|
| - return !b[length];
|
| -}
|
| -
|
| -bool equalNonNull(const StringImpl* a, const StringImpl* b) {
|
| - DCHECK(a);
|
| - DCHECK(b);
|
| - if (a == b)
|
| - return true;
|
| -
|
| - return stringImplContentEqual(a, b);
|
| -}
|
| -
|
| -bool equalIgnoringNullity(StringImpl* a, StringImpl* b) {
|
| - if (!a && b && !b->length())
|
| - return true;
|
| - if (!b && a && !a->length())
|
| - return true;
|
| - return equal(a, b);
|
| -}
|
| -
|
| -template <typename CharacterType1, typename CharacterType2>
|
| -int codePointCompareIgnoringASCIICase(unsigned l1,
|
| - unsigned l2,
|
| - const CharacterType1* c1,
|
| - const CharacterType2* c2) {
|
| - const unsigned lmin = l1 < l2 ? l1 : l2;
|
| - unsigned pos = 0;
|
| - while (pos < lmin && toASCIILower(*c1) == toASCIILower(*c2)) {
|
| - ++c1;
|
| - ++c2;
|
| - ++pos;
|
| - }
|
| -
|
| - if (pos < lmin)
|
| - return (toASCIILower(c1[0]) > toASCIILower(c2[0])) ? 1 : -1;
|
| -
|
| - if (l1 == l2)
|
| - return 0;
|
| -
|
| - return (l1 > l2) ? 1 : -1;
|
| -}
|
| -
|
| -int codePointCompareIgnoringASCIICase(const StringImpl* string1,
|
| - const LChar* string2) {
|
| - unsigned length1 = string1 ? string1->length() : 0;
|
| - size_t length2 = string2 ? strlen(reinterpret_cast<const char*>(string2)) : 0;
|
| -
|
| - if (!string1)
|
| - return length2 > 0 ? -1 : 0;
|
| -
|
| - if (!string2)
|
| - return length1 > 0 ? 1 : 0;
|
| -
|
| - if (string1->is8Bit())
|
| - return codePointCompareIgnoringASCIICase(length1, length2,
|
| - string1->characters8(), string2);
|
| - return codePointCompareIgnoringASCIICase(length1, length2,
|
| - string1->characters16(), string2);
|
| -}
|
| -
|
| -UChar32 toUpper(UChar32 c, const AtomicString& localeIdentifier) {
|
| - if (!localeIdentifier.isNull()) {
|
| - if (localeIdMatchesLang(localeIdentifier, "tr") ||
|
| - localeIdMatchesLang(localeIdentifier, "az")) {
|
| - if (c == 'i')
|
| - return latinCapitalLetterIWithDotAbove;
|
| - if (c == latinSmallLetterDotlessI)
|
| - return 'I';
|
| - } else if (localeIdMatchesLang(localeIdentifier, "lt")) {
|
| - // TODO(rob.buis) implement upper-casing rules for lt
|
| - // like in StringImpl::upper(locale).
|
| - }
|
| - }
|
| -
|
| - return toUpper(c);
|
| -}
|
| -
|
| -} // namespace WTF
|
|
|