third_party/WebKit/Source/wtf/text/StringImpl.cpp - Issue 2488763002: Use ICU's case mapping API for Greek uppercasing

Side by Side Diff: third_party/WebKit/Source/wtf/text/StringImpl.cpp

Issue 2488763002: Use ICU's case mapping API for Greek uppercasing (Closed)

Patch Set: Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 /*

2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)	2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)

3 * (C) 1999 Antti Koivisto (koivisto@kde.org)	3 * (C) 1999 Antti Koivisto (koivisto@kde.org)

4 * (C) 2001 Dirk Mueller ( mueller@kde.org )	4 * (C) 2001 Dirk Mueller ( mueller@kde.org )

5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All	5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All

6 * rights reserved.	6 * rights reserved.

7 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)	7 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)

8 *	8 *

9 * This library is free software; you can redistribute it and/or	9 * This library is free software; you can redistribute it and/or

10 * modify it under the terms of the GNU Library General Public	10 * modify it under the terms of the GNU Library General Public

(...skipping 21 matching lines...) Expand all Loading...
32 #include "wtf/allocator/PartitionAlloc.h"	32 #include "wtf/allocator/PartitionAlloc.h"

33 #include "wtf/allocator/Partitions.h"	33 #include "wtf/allocator/Partitions.h"

34 #include "wtf/text/AtomicString.h"	34 #include "wtf/text/AtomicString.h"

35 #include "wtf/text/AtomicStringTable.h"	35 #include "wtf/text/AtomicStringTable.h"

36 #include "wtf/text/CharacterNames.h"	36 #include "wtf/text/CharacterNames.h"

37 #include "wtf/text/StringBuffer.h"	37 #include "wtf/text/StringBuffer.h"

38 #include "wtf/text/StringHash.h"	38 #include "wtf/text/StringHash.h"

39 #include "wtf/text/StringToNumber.h"	39 #include "wtf/text/StringToNumber.h"

40 #include <algorithm>	40 #include <algorithm>

41 #include <memory>	41 #include <memory>

42 #include <unicode/translit.h>

43 #include <unicode/unistr.h>

44	42

45 #ifdef STRING_STATS	43 #ifdef STRING_STATS

46 #include "wtf/DataLog.h"	44 #include "wtf/DataLog.h"

47 #include "wtf/HashMap.h"	45 #include "wtf/HashMap.h"

48 #include "wtf/HashSet.h"	46 #include "wtf/HashSet.h"

49 #include "wtf/RefCounted.h"	47 #include "wtf/RefCounted.h"

50 #include "wtf/ThreadingPrimitives.h"	48 #include "wtf/ThreadingPrimitives.h"

51 #include <unistd.h>	49 #include <unistd.h>

52 #endif	50 #endif

53	51

(...skipping 742 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
796 return output.release();	794 return output.release();

797 }	795 }

798 if (status != U_BUFFER_OVERFLOW_ERROR)	796 if (status != U_BUFFER_OVERFLOW_ERROR)

799 return originalString;	797 return originalString;

800 // Expand the buffer.	798 // Expand the buffer.

801 output = StringImpl::createUninitialized(targetLength, data16);	799 output = StringImpl::createUninitialized(targetLength, data16);

802 } while (true);	800 } while (true);

803 }	801 }

804	802

805 PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier) {	803 PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier) {

806 // Use the more-optimized code path most of the time.	804 // Use the more optimized code path most of the time.

807 // Only Turkic (tr and az) languages and Lithuanian requires	805 // Only Turkic (tr and az) languages and Lithuanian requires

808 // locale-specific lowercasing rules. Even though CLDR has el-Lower,	806 // locale-specific lowercasing rules. Even though CLDR has el-Lower,

809 // it's identical to the locale-agnostic lowercasing. Context-dependent	807 // it's identical to the locale-agnostic lowercasing. Context-dependent

810 // handling of Greek capital sigma is built into the common lowercasing	808 // handling of Greek capital sigma is built into the common lowercasing

811 // function in ICU.	809 // function in ICU.

812 const char* localeForConversion = 0;	810 const char* localeForConversion = 0;

813 if (localeIdMatchesLang(localeIdentifier, "tr") \|\|	811 if (localeIdMatchesLang(localeIdentifier, "tr") \|\|

814 localeIdMatchesLang(localeIdentifier, "az"))	812 localeIdMatchesLang(localeIdentifier, "az"))

815 localeForConversion = "tr";	813 localeForConversion = "tr";

816 else if (localeIdMatchesLang(localeIdentifier, "lt"))	814 else if (localeIdMatchesLang(localeIdentifier, "lt"))

817 localeForConversion = "lt";	815 localeForConversion = "lt";

818 else	816 else

819 return lower();	817 return lower();

820	818

821 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))	819 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))

822 CRASH();	820 CRASH();

823 int length = m_length;	821 int length = m_length;

824	822

825 RefPtr<StringImpl> upconverted = upconvertedString();	823 RefPtr<StringImpl> upconverted = upconvertedString();

826 const UChar* source16 = upconverted->characters16();	824 const UChar* source16 = upconverted->characters16();

827 return caseConvert(source16, length, u_strToLower, localeForConversion, this);	825 return caseConvert(source16, length, u_strToLower, localeForConversion, this);

828 }	826 }

829	827

830 PassRefPtr<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier) {	828 PassRefPtr<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier) {

831 // Use the more-optimized code path most of the time.	829 // Use the more-optimized code path most of the time.

832 // Only Turkic (tr and az) languages and Greek require locale-specific	830 // Only Turkic (tr and az) languages, Greek and Lithuanian require

833 // lowercasing rules.	831 // locale-specific uppercasing rules.

834 icu::UnicodeString transliteratorId;

835 const char* localeForConversion = 0;	832 const char* localeForConversion = 0;

836 if (localeIdMatchesLang(localeIdentifier, "tr") \|\|	833 if (localeIdMatchesLang(localeIdentifier, "tr") \|\|

837 localeIdMatchesLang(localeIdentifier, "az"))	834 localeIdMatchesLang(localeIdentifier, "az"))

838 localeForConversion = "tr";	835 localeForConversion = "tr";

839 else if (localeIdMatchesLang(localeIdentifier, "el"))	836 else if (localeIdMatchesLang(localeIdentifier, "el"))

840 transliteratorId = UNICODE_STRING_SIMPLE("el-Upper");	837 localeForConversion = "el";

841 else if (localeIdMatchesLang(localeIdentifier, "lt"))	838 else if (localeIdMatchesLang(localeIdentifier, "lt"))

842 localeForConversion = "lt";	839 localeForConversion = "lt";

843 else	840 else

844 return upper();	841 return upper();

845	842

846 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))	843 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))

847 CRASH();	844 CRASH();

848 int length = m_length;	845 int length = m_length;

849	846

850 RefPtr<StringImpl> upconverted = upconvertedString();	847 RefPtr<StringImpl> upconverted = upconvertedString();

851 const UChar* source16 = upconverted->characters16();	848 const UChar* source16 = upconverted->characters16();

852	849

853 if (localeForConversion)	850 return caseConvert(source16, length, u_strToUpper, localeForConversion, this);

854 return caseConvert(source16, length, u_strToUpper, localeForConversion,

855 this);

856

857 // TODO(jungshik): Cache transliterator if perf penaly warrants it for Greek.

858 UErrorCode status = U_ZERO_ERROR;

859 std::unique_ptr<icu::Transliterator> translit =

860 wrapUnique(icu::Transliterator::createInstance(transliteratorId,

861 UTRANS_FORWARD, status));

862 if (U_FAILURE(status))

863 return upper();

864

865 // target will be copy-on-write.

866 icu::UnicodeString target(false, source16, length);

867 translit->transliterate(target);

868

869 return create(target.getBuffer(), target.length());

870 }	851 }

871	852

872 PassRefPtr<StringImpl> StringImpl::fill(UChar character) {	853 PassRefPtr<StringImpl> StringImpl::fill(UChar character) {

873 if (!(character & ~0x7F)) {	854 if (!(character & ~0x7F)) {

874 LChar* data;	855 LChar* data;

875 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);	856 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);

876 for (unsigned i = 0; i < m_length; ++i)	857 for (unsigned i = 0; i < m_length; ++i)

877 data[i] = static_cast<LChar>(character);	858 data[i] = static_cast<LChar>(character);

878 return newImpl.release();	859 return newImpl.release();

879 }	860 }

(...skipping 1305 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2185 } else if (localeIdMatchesLang(localeIdentifier, "lt")) {	2166 } else if (localeIdMatchesLang(localeIdentifier, "lt")) {

2186 // TODO(rob.buis) implement upper-casing rules for lt	2167 // TODO(rob.buis) implement upper-casing rules for lt

2187 // like in StringImpl::upper(locale).	2168 // like in StringImpl::upper(locale).

2188 }	2169 }

2189 }	2170 }

2190	2171

2191 return toUpper(c);	2172 return toUpper(c);

2192 }	2173 }

2193	2174

2194 } // namespace WTF	2175 } // namespace WTF

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »