base/string_util.h - Issue 147038: Pass through non-character codepoints in UTF-8,16,32 and Wide conversion func...

Side by Side Diff: base/string_util.h

Issue 147038: Pass through non-character codepoints in UTF-8,16,32 and Wide conversion func... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 11 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4 //	4 //

5 // This file defines utility functions for working with strings.	5 // This file defines utility functions for working with strings.

6	6

7 #ifndef BASE_STRING_UTIL_H_	7 #ifndef BASE_STRING_UTIL_H_

8 #define BASE_STRING_UTIL_H_	8 #define BASE_STRING_UTIL_H_

9	9

10 #include <stdarg.h> // va_list	10 #include <stdarg.h> // va_list

(...skipping 168 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
179 std::wstring ASCIIToWide(const StringPiece& ascii);	179 std::wstring ASCIIToWide(const StringPiece& ascii);

180 std::string UTF16ToASCII(const string16& utf16);	180 std::string UTF16ToASCII(const string16& utf16);

181 string16 ASCIIToUTF16(const StringPiece& ascii);	181 string16 ASCIIToUTF16(const StringPiece& ascii);

182	182

183 // These convert between UTF-8, -16, and -32 strings. They are potentially slow,	183 // These convert between UTF-8, -16, and -32 strings. They are potentially slow,

184 // so avoid unnecessary conversions. The low-level versions return a boolean	184 // so avoid unnecessary conversions. The low-level versions return a boolean

185 // indicating whether the conversion was 100% valid. In this case, it will still	185 // indicating whether the conversion was 100% valid. In this case, it will still

186 // do the best it can and put the result in the output buffer. The versions that	186 // do the best it can and put the result in the output buffer. The versions that

187 // return strings ignore this error and just return the best conversion	187 // return strings ignore this error and just return the best conversion

188 // possible.	188 // possible.

	189 //

	190 // Note that only the structural validity is checked and non-character

	191 // codepoints and unassigned are regarded as valid.

	192 // TODO(jungshik): Consider replacing an invalid input sequence with

	193 // the Unicode replacement character or adding \|replacement_char\| parameter.

	194 // Currently, it's skipped in the ouput, which could be problematic in

	195 // some situations.

189 bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output);	196 bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output);

190 std::string WideToUTF8(const std::wstring& wide);	197 std::string WideToUTF8(const std::wstring& wide);

191 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output);	198 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output);

192 std::wstring UTF8ToWide(const StringPiece& utf8);	199 std::wstring UTF8ToWide(const StringPiece& utf8);

193	200

194 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output);	201 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output);

195 string16 WideToUTF16(const std::wstring& wide);	202 string16 WideToUTF16(const std::wstring& wide);

196 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output);	203 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output);

197 std::wstring UTF16ToWide(const string16& utf16);	204 std::wstring UTF16ToWide(const string16& utf16);

198	205

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
243 std::wstring* wide);	250 std::wstring* wide);

244	251

245 // Converts the given wide string to the corresponding Latin1. This will fail	252 // Converts the given wide string to the corresponding Latin1. This will fail

246 // (return false) if any characters are more than 255.	253 // (return false) if any characters are more than 255.

247 bool WideToLatin1(const std::wstring& wide, std::string* latin1);	254 bool WideToLatin1(const std::wstring& wide, std::string* latin1);

248	255

249 // Returns true if the specified string matches the criteria. How can a wide	256 // Returns true if the specified string matches the criteria. How can a wide

250 // string be 8-bit or UTF8? It contains only characters that are < 256 (in the	257 // string be 8-bit or UTF8? It contains only characters that are < 256 (in the

251 // first case) or characters that use only 8-bits and whose 8-bit	258 // first case) or characters that use only 8-bits and whose 8-bit

252 // representation looks like a UTF-8 string (the second case).	259 // representation looks like a UTF-8 string (the second case).

	260 //

	261 // Note that IsStringUTF8 checks not only if the input is structrually

	262 // valid but also if it doesn't contain any non-character codepoint

	263 // (e.g. U+FFFE). It's done on purpose because all the existing callers want

	264 // to have the maximum 'discriminating' power from other encodings. If

	265 // there's a use case for just checking the structural validity, we have to

	266 // add a new function for that.

253 bool IsString8Bit(const std::wstring& str);	267 bool IsString8Bit(const std::wstring& str);

254 bool IsStringUTF8(const std::string& str);	268 bool IsStringUTF8(const std::string& str);

255 bool IsStringWideUTF8(const std::wstring& str);	269 bool IsStringWideUTF8(const std::wstring& str);

256 bool IsStringASCII(const std::wstring& str);	270 bool IsStringASCII(const std::wstring& str);

257 bool IsStringASCII(const StringPiece& str);	271 bool IsStringASCII(const StringPiece& str);

258 bool IsStringASCII(const string16& str);	272 bool IsStringASCII(const string16& str);

259	273

260 // ASCII-specific tolower. The standard library's tolower is locale sensitive,	274 // ASCII-specific tolower. The standard library's tolower is locale sensitive,

261 // so we don't want to use it here.	275 // so we don't want to use it here.

262 template <class Char> inline Char ToLowerASCII(Char c) {	276 template <class Char> inline Char ToLowerASCII(Char c) {

(...skipping 358 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
621 #elif defined(WCHAR_T_IS_UTF32)	635 #elif defined(WCHAR_T_IS_UTF32)

622 typedef uint32 Unsigned;	636 typedef uint32 Unsigned;

623 #endif	637 #endif

624 };	638 };

625 template<>	639 template<>

626 struct ToUnsigned<short> {	640 struct ToUnsigned<short> {

627 typedef unsigned short Unsigned;	641 typedef unsigned short Unsigned;

628 };	642 };

629	643

630 #endif // BASE_STRING_UTIL_H_	644 #endif // BASE_STRING_UTIL_H_

OLD	NEW

« no previous file with comments | « base/file_util_unittest.cc ('k') | base/string_util_icu.cc » ('j') | no next file with comments »