| OLD | NEW |
| 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 // | 4 // |
| 5 // This file defines utility functions for working with strings. | 5 // This file defines utility functions for working with strings. |
| 6 | 6 |
| 7 #ifndef BASE_STRING_UTIL_H_ | 7 #ifndef BASE_STRING_UTIL_H_ |
| 8 #define BASE_STRING_UTIL_H_ | 8 #define BASE_STRING_UTIL_H_ |
| 9 | 9 |
| 10 #include <stdarg.h> // va_list | 10 #include <stdarg.h> // va_list |
| (...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 179 std::wstring ASCIIToWide(const StringPiece& ascii); | 179 std::wstring ASCIIToWide(const StringPiece& ascii); |
| 180 std::string UTF16ToASCII(const string16& utf16); | 180 std::string UTF16ToASCII(const string16& utf16); |
| 181 string16 ASCIIToUTF16(const StringPiece& ascii); | 181 string16 ASCIIToUTF16(const StringPiece& ascii); |
| 182 | 182 |
| 183 // These convert between UTF-8, -16, and -32 strings. They are potentially slow, | 183 // These convert between UTF-8, -16, and -32 strings. They are potentially slow, |
| 184 // so avoid unnecessary conversions. The low-level versions return a boolean | 184 // so avoid unnecessary conversions. The low-level versions return a boolean |
| 185 // indicating whether the conversion was 100% valid. In this case, it will still | 185 // indicating whether the conversion was 100% valid. In this case, it will still |
| 186 // do the best it can and put the result in the output buffer. The versions that | 186 // do the best it can and put the result in the output buffer. The versions that |
| 187 // return strings ignore this error and just return the best conversion | 187 // return strings ignore this error and just return the best conversion |
| 188 // possible. | 188 // possible. |
| 189 // |
| 190 // Note that only the structural validity is checked and non-character |
| 191 // codepoints and unassigned are regarded as valid. |
| 192 // TODO(jungshik): Consider replacing an invalid input sequence with |
| 193 // the Unicode replacement character or adding |replacement_char| parameter. |
| 194 // Currently, it's skipped in the ouput, which could be problematic in |
| 195 // some situations. |
| 189 bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output); | 196 bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output); |
| 190 std::string WideToUTF8(const std::wstring& wide); | 197 std::string WideToUTF8(const std::wstring& wide); |
| 191 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output); | 198 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output); |
| 192 std::wstring UTF8ToWide(const StringPiece& utf8); | 199 std::wstring UTF8ToWide(const StringPiece& utf8); |
| 193 | 200 |
| 194 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output); | 201 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output); |
| 195 string16 WideToUTF16(const std::wstring& wide); | 202 string16 WideToUTF16(const std::wstring& wide); |
| 196 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output); | 203 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output); |
| 197 std::wstring UTF16ToWide(const string16& utf16); | 204 std::wstring UTF16ToWide(const string16& utf16); |
| 198 | 205 |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 243 std::wstring* wide); | 250 std::wstring* wide); |
| 244 | 251 |
| 245 // Converts the given wide string to the corresponding Latin1. This will fail | 252 // Converts the given wide string to the corresponding Latin1. This will fail |
| 246 // (return false) if any characters are more than 255. | 253 // (return false) if any characters are more than 255. |
| 247 bool WideToLatin1(const std::wstring& wide, std::string* latin1); | 254 bool WideToLatin1(const std::wstring& wide, std::string* latin1); |
| 248 | 255 |
| 249 // Returns true if the specified string matches the criteria. How can a wide | 256 // Returns true if the specified string matches the criteria. How can a wide |
| 250 // string be 8-bit or UTF8? It contains only characters that are < 256 (in the | 257 // string be 8-bit or UTF8? It contains only characters that are < 256 (in the |
| 251 // first case) or characters that use only 8-bits and whose 8-bit | 258 // first case) or characters that use only 8-bits and whose 8-bit |
| 252 // representation looks like a UTF-8 string (the second case). | 259 // representation looks like a UTF-8 string (the second case). |
| 260 // |
| 261 // Note that IsStringUTF8 checks not only if the input is structrually |
| 262 // valid but also if it doesn't contain any non-character codepoint |
| 263 // (e.g. U+FFFE). It's done on purpose because all the existing callers want |
| 264 // to have the maximum 'discriminating' power from other encodings. If |
| 265 // there's a use case for just checking the structural validity, we have to |
| 266 // add a new function for that. |
| 253 bool IsString8Bit(const std::wstring& str); | 267 bool IsString8Bit(const std::wstring& str); |
| 254 bool IsStringUTF8(const std::string& str); | 268 bool IsStringUTF8(const std::string& str); |
| 255 bool IsStringWideUTF8(const std::wstring& str); | 269 bool IsStringWideUTF8(const std::wstring& str); |
| 256 bool IsStringASCII(const std::wstring& str); | 270 bool IsStringASCII(const std::wstring& str); |
| 257 bool IsStringASCII(const StringPiece& str); | 271 bool IsStringASCII(const StringPiece& str); |
| 258 bool IsStringASCII(const string16& str); | 272 bool IsStringASCII(const string16& str); |
| 259 | 273 |
| 260 // ASCII-specific tolower. The standard library's tolower is locale sensitive, | 274 // ASCII-specific tolower. The standard library's tolower is locale sensitive, |
| 261 // so we don't want to use it here. | 275 // so we don't want to use it here. |
| 262 template <class Char> inline Char ToLowerASCII(Char c) { | 276 template <class Char> inline Char ToLowerASCII(Char c) { |
| (...skipping 358 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 621 #elif defined(WCHAR_T_IS_UTF32) | 635 #elif defined(WCHAR_T_IS_UTF32) |
| 622 typedef uint32 Unsigned; | 636 typedef uint32 Unsigned; |
| 623 #endif | 637 #endif |
| 624 }; | 638 }; |
| 625 template<> | 639 template<> |
| 626 struct ToUnsigned<short> { | 640 struct ToUnsigned<short> { |
| 627 typedef unsigned short Unsigned; | 641 typedef unsigned short Unsigned; |
| 628 }; | 642 }; |
| 629 | 643 |
| 630 #endif // BASE_STRING_UTIL_H_ | 644 #endif // BASE_STRING_UTIL_H_ |
| OLD | NEW |