Source/wtf/text/WTFString.cpp - Issue 124003003: Add ascii() / latin1() / utf8() methods to AtomicString to avoid having to call string()

Side by Side Diff: Source/wtf/text/WTFString.cpp

Issue 124003003: Add ascii() / latin1() / utf8() methods to AtomicString to avoid having to call string() (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master

Patch Set: Created 6 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « Source/wtf/text/WTFString.h ('k') | no next file » | no next file with comments »

OLD	NEW
1 /*	1 /*

2 * (C) 1999 Lars Knoll (knoll@kde.org)	2 * (C) 1999 Lars Knoll (knoll@kde.org)

3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010, 2012 Apple Inc. All rights reserved.	3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010, 2012 Apple Inc. All rights reserved.

4 * Copyright (C) 2007-2009 Torch Mobile, Inc.	4 * Copyright (C) 2007-2009 Torch Mobile, Inc.

5 *	5 *

6 * This library is free software; you can redistribute it and/or	6 * This library is free software; you can redistribute it and/or

7 * modify it under the terms of the GNU Library General Public	7 * modify it under the terms of the GNU Library General Public

8 * License as published by the Free Software Foundation; either	8 * License as published by the Free Software Foundation; either

9 * version 2 of the License, or (at your option) any later version.	9 * version 2 of the License, or (at your option) any later version.

10 *	10 *

(...skipping 774 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
785	785

786 // Helper to write a three-byte UTF-8 code point to the buffer, caller must chec k room is available.	786 // Helper to write a three-byte UTF-8 code point to the buffer, caller must chec k room is available.

787 static inline void putUTF8Triple(char*& buffer, UChar ch)	787 static inline void putUTF8Triple(char*& buffer, UChar ch)

788 {	788 {

789 ASSERT(ch >= 0x0800);	789 ASSERT(ch >= 0x0800);

790 *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) \| 0xE0);	790 *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) \| 0xE0);

791 *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) \| 0x80);	791 *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) \| 0x80);

792 *buffer++ = static_cast<char>((ch & 0x3F) \| 0x80);	792 *buffer++ = static_cast<char>((ch & 0x3F) \| 0x80);

793 }	793 }

794	794

795 CString String::utf8(ConversionMode mode) const	795 CString String::utf8(UTF8ConversionMode mode) const

796 {	796 {

797 unsigned length = this->length();	797 unsigned length = this->length();

798	798

799 if (!length)	799 if (!length)

800 return CString("", 0);	800 return CString("", 0);

801	801

802 // Allocate a buffer big enough to hold all the characters	802 // Allocate a buffer big enough to hold all the characters

803 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).	803 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).

804 // Optimization ideas, if we find this function is hot:	804 // Optimization ideas, if we find this function is hot:

805 // * We could speculatively create a CStringBuffer to contain 'length'	805 // * We could speculatively create a CStringBuffer to contain 'length'

(...skipping 10 matching lines...) Expand all Loading...
816 char* buffer = bufferVector.data();	816 char* buffer = bufferVector.data();

817	817

818 if (is8Bit()) {	818 if (is8Bit()) {

819 const LChar* characters = this->characters8();	819 const LChar* characters = this->characters8();

820	820

821 ConversionResult result = convertLatin1ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size());	821 ConversionResult result = convertLatin1ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size());

822 ASSERT_UNUSED(result, result != targetExhausted); // (length * 3) should be sufficient for any conversion	822 ASSERT_UNUSED(result, result != targetExhausted); // (length * 3) should be sufficient for any conversion

823 } else {	823 } else {

824 const UChar* characters = this->characters16();	824 const UChar* characters = this->characters16();

825	825

826 if (mode == StrictConversionReplacingUnpairedSurrogatesWithFFFD) {	826 if (mode == StrictUTF8ConversionReplacingUnpairedSurrogatesWithFFFD) {

827 const UChar* charactersEnd = characters + length;	827 const UChar* charactersEnd = characters + length;

828 char* bufferEnd = buffer + bufferVector.size();	828 char* bufferEnd = buffer + bufferVector.size();

829 while (characters < charactersEnd) {	829 while (characters < charactersEnd) {

830 // Use strict conversion to detect unpaired surrogates.	830 // Use strict conversion to detect unpaired surrogates.

831 ConversionResult result = convertUTF16ToUTF8(&characters, charac tersEnd, &buffer, bufferEnd, true);	831 ConversionResult result = convertUTF16ToUTF8(&characters, charac tersEnd, &buffer, bufferEnd, true);

832 ASSERT(result != targetExhausted);	832 ASSERT(result != targetExhausted);

833 // Conversion fails when there is an unpaired surrogate.	833 // Conversion fails when there is an unpaired surrogate.

834 // Put replacement character (U+FFFD) instead of the unpaired su rrogate.	834 // Put replacement character (U+FFFD) instead of the unpaired su rrogate.

835 if (result != conversionOK) {	835 if (result != conversionOK) {

836 ASSERT((0xD800 <= characters && characters <= 0xDFFF));	836 ASSERT((0xD800 <= characters && characters <= 0xDFFF));

837 // There should be room left, since one UChar hasn't been co nverted.	837 // There should be room left, since one UChar hasn't been co nverted.

838 ASSERT((buffer + 3) <= bufferEnd);	838 ASSERT((buffer + 3) <= bufferEnd);

839 putUTF8Triple(buffer, replacementCharacter);	839 putUTF8Triple(buffer, replacementCharacter);

840 ++characters;	840 ++characters;

841 }	841 }

842 }	842 }

843 } else {	843 } else {

844 bool strict = mode == StrictConversion;	844 bool strict = mode == StrictUTF8Conversion;

845 ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict);	845 ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict);

846 ASSERT(result != targetExhausted); // (length * 3) should be suffici ent for any conversion	846 ASSERT(result != targetExhausted); // (length * 3) should be suffici ent for any conversion

847	847

848 // Only produced from strict conversion.	848 // Only produced from strict conversion.

849 if (result == sourceIllegal) {	849 if (result == sourceIllegal) {

850 ASSERT(strict);	850 ASSERT(strict);

851 return CString();	851 return CString();

852 }	852 }

853	853

854 // Check for an unconverted high surrogate.	854 // Check for an unconverted high surrogate.

(...skipping 411 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1266 buffer.append('\0');	1266 buffer.append('\0');

1267 return buffer;	1267 return buffer;

1268 }	1268 }

1269	1269

1270 Vector<char> asciiDebug(String& string)	1270 Vector<char> asciiDebug(String& string)

1271 {	1271 {

1272 return asciiDebug(string.impl());	1272 return asciiDebug(string.impl());

1273 }	1273 }

1274	1274

1275 #endif	1275 #endif

OLD	NEW