base/string_util.h - Issue 28189: ASCII <-> UTF16 conversion functions. These are just copies of WideToASCII a...

Side by Side Diff: base/string_util.h

Issue 28189: ASCII <-> UTF16 conversion functions. These are just copies of WideToASCII a... (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: '' Created 11 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« no previous file with comments | « no previous file | base/string_util.cc » ('j') | no next file with comments »

OLD	NEW
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4 //	4 //

5 // This file defines utility functions for working with strings.	5 // This file defines utility functions for working with strings.

6	6

7 #ifndef BASE_STRING_UTIL_H_	7 #ifndef BASE_STRING_UTIL_H_

8 #define BASE_STRING_UTIL_H_	8 #define BASE_STRING_UTIL_H_

9	9

10 #include <string>	10 #include <string>

(...skipping 138 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
149 // strings that contain them. This is useful when trying to deal with text	149 // strings that contain them. This is useful when trying to deal with text

150 // copied from terminals.	150 // copied from terminals.

151 // Returns \|text, with the following three transformations:	151 // Returns \|text, with the following three transformations:

152 // (1) Leading and trailing whitespace is trimmed.	152 // (1) Leading and trailing whitespace is trimmed.

153 // (2) If \|trim_sequences_with_line_breaks\| is true, any other whitespace	153 // (2) If \|trim_sequences_with_line_breaks\| is true, any other whitespace

154 // sequences containing a CR or LF are trimmed.	154 // sequences containing a CR or LF are trimmed.

155 // (3) All other whitespace sequences are converted to single spaces.	155 // (3) All other whitespace sequences are converted to single spaces.

156 std::wstring CollapseWhitespace(const std::wstring& text,	156 std::wstring CollapseWhitespace(const std::wstring& text,

157 bool trim_sequences_with_line_breaks);	157 bool trim_sequences_with_line_breaks);

158	158

159 // These convert between ASCII (7-bit) and UTF16 strings.	159 // These convert between ASCII (7-bit) and Wide/UTF16 strings.

160 std::string WideToASCII(const std::wstring& wide);	160 std::string WideToASCII(const std::wstring& wide);

161 std::wstring ASCIIToWide(const std::string& ascii);	161 std::wstring ASCIIToWide(const std::string& ascii);

	162 std::string UTF16ToASCII(const string16& utf16);

	163 string16 ASCIIToUTF16(const std::string& ascii);

162	164

163 // These convert between UTF-8, -16, and -32 strings. They are potentially slow,	165 // These convert between UTF-8, -16, and -32 strings. They are potentially slow,

164 // so avoid unnecessary conversions. The low-level versions return a boolean	166 // so avoid unnecessary conversions. The low-level versions return a boolean

165 // indicating whether the conversion was 100% valid. In this case, it will still	167 // indicating whether the conversion was 100% valid. In this case, it will still

166 // do the best it can and put the result in the output buffer. The versions that	168 // do the best it can and put the result in the output buffer. The versions that

167 // return strings ignore this error and just return the best conversion	169 // return strings ignore this error and just return the best conversion

168 // possible.	170 // possible.

169 bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output);	171 bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output);

170 std::string WideToUTF8(const std::wstring& wide);	172 std::string WideToUTF8(const std::wstring& wide);

171 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output);	173 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output);

(...skipping 56 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
228	230

229 // Returns true if the specified string matches the criteria. How can a wide	231 // Returns true if the specified string matches the criteria. How can a wide

230 // string be 8-bit or UTF8? It contains only characters that are < 256 (in the	232 // string be 8-bit or UTF8? It contains only characters that are < 256 (in the

231 // first case) or characters that use only 8-bits and whose 8-bit	233 // first case) or characters that use only 8-bits and whose 8-bit

232 // representation looks like a UTF-8 string (the second case).	234 // representation looks like a UTF-8 string (the second case).

233 bool IsString8Bit(const std::wstring& str);	235 bool IsString8Bit(const std::wstring& str);

234 bool IsStringUTF8(const std::string& str);	236 bool IsStringUTF8(const std::string& str);

235 bool IsStringWideUTF8(const std::wstring& str);	237 bool IsStringWideUTF8(const std::wstring& str);

236 bool IsStringASCII(const std::wstring& str);	238 bool IsStringASCII(const std::wstring& str);

237 bool IsStringASCII(const std::string& str);	239 bool IsStringASCII(const std::string& str);

	240 bool IsStringASCII(const string16& str);

238	241

239 // ASCII-specific tolower. The standard library's tolower is locale sensitive,	242 // ASCII-specific tolower. The standard library's tolower is locale sensitive,

240 // so we don't want to use it here.	243 // so we don't want to use it here.

241 template <class Char> inline Char ToLowerASCII(Char c) {	244 template <class Char> inline Char ToLowerASCII(Char c) {

242 return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;	245 return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;

243 }	246 }

244	247

245 // Converts the elements of the given string. This version uses a pointer to	248 // Converts the elements of the given string. This version uses a pointer to

246 // clearly differentiate it from the non-pointer variant.	249 // clearly differentiate it from the non-pointer variant.

247 template <class str> inline void StringToLowerASCII(str* s) {	250 template <class str> inline void StringToLowerASCII(str* s) {

(...skipping 318 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
566 // Returns a hex string representation of a binary buffer.	569 // Returns a hex string representation of a binary buffer.

567 // The returned hex string will be in upper case.	570 // The returned hex string will be in upper case.

568 // This function does not check if \|size\| is within reasonable limits since	571 // This function does not check if \|size\| is within reasonable limits since

569 // it's written with trusted data in mind.	572 // it's written with trusted data in mind.

570 // If you suspect that the data you want to format might be large,	573 // If you suspect that the data you want to format might be large,

571 // the absolute max size for \|size\| should be is	574 // the absolute max size for \|size\| should be is

572 // std::numeric_limits<size_t>::max() / 2	575 // std::numeric_limits<size_t>::max() / 2

573 std::string HexEncode(const void* bytes, size_t size);	576 std::string HexEncode(const void* bytes, size_t size);

574	577

575 #endif // BASE_STRING_UTIL_H_	578 #endif // BASE_STRING_UTIL_H_

OLD	NEW