| OLD | NEW |
| 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 // | 4 // |
| 5 // This file defines utility functions for working with strings. | 5 // This file defines utility functions for working with strings. |
| 6 | 6 |
| 7 #ifndef BASE_STRING_UTIL_H_ | 7 #ifndef BASE_STRING_UTIL_H_ |
| 8 #define BASE_STRING_UTIL_H_ | 8 #define BASE_STRING_UTIL_H_ |
| 9 | 9 |
| 10 #include <stdarg.h> // va_list | 10 #include <stdarg.h> // va_list |
| (...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 169 // (2) If |trim_sequences_with_line_breaks| is true, any other whitespace | 169 // (2) If |trim_sequences_with_line_breaks| is true, any other whitespace |
| 170 // sequences containing a CR or LF are trimmed. | 170 // sequences containing a CR or LF are trimmed. |
| 171 // (3) All other whitespace sequences are converted to single spaces. | 171 // (3) All other whitespace sequences are converted to single spaces. |
| 172 std::wstring CollapseWhitespace(const std::wstring& text, | 172 std::wstring CollapseWhitespace(const std::wstring& text, |
| 173 bool trim_sequences_with_line_breaks); | 173 bool trim_sequences_with_line_breaks); |
| 174 std::string CollapseWhitespaceASCII(const std::string& text, | 174 std::string CollapseWhitespaceASCII(const std::string& text, |
| 175 bool trim_sequences_with_line_breaks); | 175 bool trim_sequences_with_line_breaks); |
| 176 | 176 |
| 177 // These convert between ASCII (7-bit) and Wide/UTF16 strings. | 177 // These convert between ASCII (7-bit) and Wide/UTF16 strings. |
| 178 std::string WideToASCII(const std::wstring& wide); | 178 std::string WideToASCII(const std::wstring& wide); |
| 179 std::wstring ASCIIToWide(const StringPiece& ascii); | 179 std::wstring ASCIIToWide(const base::StringPiece& ascii); |
| 180 std::string UTF16ToASCII(const string16& utf16); | 180 std::string UTF16ToASCII(const string16& utf16); |
| 181 string16 ASCIIToUTF16(const StringPiece& ascii); | 181 string16 ASCIIToUTF16(const base::StringPiece& ascii); |
| 182 | 182 |
| 183 // These convert between UTF-8, -16, and -32 strings. They are potentially slow, | 183 // These convert between UTF-8, -16, and -32 strings. They are potentially slow, |
| 184 // so avoid unnecessary conversions. The low-level versions return a boolean | 184 // so avoid unnecessary conversions. The low-level versions return a boolean |
| 185 // indicating whether the conversion was 100% valid. In this case, it will still | 185 // indicating whether the conversion was 100% valid. In this case, it will still |
| 186 // do the best it can and put the result in the output buffer. The versions that | 186 // do the best it can and put the result in the output buffer. The versions that |
| 187 // return strings ignore this error and just return the best conversion | 187 // return strings ignore this error and just return the best conversion |
| 188 // possible. | 188 // possible. |
| 189 // | 189 // |
| 190 // Note that only the structural validity is checked and non-character | 190 // Note that only the structural validity is checked and non-character |
| 191 // codepoints and unassigned are regarded as valid. | 191 // codepoints and unassigned are regarded as valid. |
| 192 // TODO(jungshik): Consider replacing an invalid input sequence with | 192 // TODO(jungshik): Consider replacing an invalid input sequence with |
| 193 // the Unicode replacement character or adding |replacement_char| parameter. | 193 // the Unicode replacement character or adding |replacement_char| parameter. |
| 194 // Currently, it's skipped in the ouput, which could be problematic in | 194 // Currently, it's skipped in the ouput, which could be problematic in |
| 195 // some situations. | 195 // some situations. |
| 196 bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output); | 196 bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output); |
| 197 std::string WideToUTF8(const std::wstring& wide); | 197 std::string WideToUTF8(const std::wstring& wide); |
| 198 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output); | 198 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output); |
| 199 std::wstring UTF8ToWide(const StringPiece& utf8); | 199 std::wstring UTF8ToWide(const base::StringPiece& utf8); |
| 200 | 200 |
| 201 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output); | 201 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output); |
| 202 string16 WideToUTF16(const std::wstring& wide); | 202 string16 WideToUTF16(const std::wstring& wide); |
| 203 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output); | 203 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output); |
| 204 std::wstring UTF16ToWide(const string16& utf16); | 204 std::wstring UTF16ToWide(const string16& utf16); |
| 205 | 205 |
| 206 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output); | 206 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output); |
| 207 string16 UTF8ToUTF16(const std::string& utf8); | 207 string16 UTF8ToUTF16(const std::string& utf8); |
| 208 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output); | 208 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output); |
| 209 std::string UTF16ToUTF8(const string16& utf16); | 209 std::string UTF16ToUTF8(const string16& utf16); |
| (...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 280 // Note that IsStringUTF8 checks not only if the input is structrually | 280 // Note that IsStringUTF8 checks not only if the input is structrually |
| 281 // valid but also if it doesn't contain any non-character codepoint | 281 // valid but also if it doesn't contain any non-character codepoint |
| 282 // (e.g. U+FFFE). It's done on purpose because all the existing callers want | 282 // (e.g. U+FFFE). It's done on purpose because all the existing callers want |
| 283 // to have the maximum 'discriminating' power from other encodings. If | 283 // to have the maximum 'discriminating' power from other encodings. If |
| 284 // there's a use case for just checking the structural validity, we have to | 284 // there's a use case for just checking the structural validity, we have to |
| 285 // add a new function for that. | 285 // add a new function for that. |
| 286 bool IsString8Bit(const std::wstring& str); | 286 bool IsString8Bit(const std::wstring& str); |
| 287 bool IsStringUTF8(const std::string& str); | 287 bool IsStringUTF8(const std::string& str); |
| 288 bool IsStringWideUTF8(const std::wstring& str); | 288 bool IsStringWideUTF8(const std::wstring& str); |
| 289 bool IsStringASCII(const std::wstring& str); | 289 bool IsStringASCII(const std::wstring& str); |
| 290 bool IsStringASCII(const StringPiece& str); | 290 bool IsStringASCII(const base::StringPiece& str); |
| 291 bool IsStringASCII(const string16& str); | 291 bool IsStringASCII(const string16& str); |
| 292 | 292 |
| 293 // ASCII-specific tolower. The standard library's tolower is locale sensitive, | 293 // ASCII-specific tolower. The standard library's tolower is locale sensitive, |
| 294 // so we don't want to use it here. | 294 // so we don't want to use it here. |
| 295 template <class Char> inline Char ToLowerASCII(Char c) { | 295 template <class Char> inline Char ToLowerASCII(Char c) { |
| 296 return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; | 296 return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; |
| 297 } | 297 } |
| 298 | 298 |
| 299 // Converts the elements of the given string. This version uses a pointer to | 299 // Converts the elements of the given string. This version uses a pointer to |
| 300 // clearly differentiate it from the non-pointer variant. | 300 // clearly differentiate it from the non-pointer variant. |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 346 const char* b); | 346 const char* b); |
| 347 bool LowerCaseEqualsASCII(const char* a_begin, | 347 bool LowerCaseEqualsASCII(const char* a_begin, |
| 348 const char* a_end, | 348 const char* a_end, |
| 349 const char* b); | 349 const char* b); |
| 350 bool LowerCaseEqualsASCII(const wchar_t* a_begin, | 350 bool LowerCaseEqualsASCII(const wchar_t* a_begin, |
| 351 const wchar_t* a_end, | 351 const wchar_t* a_end, |
| 352 const char* b); | 352 const char* b); |
| 353 | 353 |
| 354 // Performs a case-sensitive string compare. The behavior is undefined if both | 354 // Performs a case-sensitive string compare. The behavior is undefined if both |
| 355 // strings are not ASCII. | 355 // strings are not ASCII. |
| 356 bool EqualsASCII(const string16& a, const StringPiece& b); | 356 bool EqualsASCII(const string16& a, const base::StringPiece& b); |
| 357 | 357 |
| 358 // Returns true if str starts with search, or false otherwise. | 358 // Returns true if str starts with search, or false otherwise. |
| 359 bool StartsWithASCII(const std::string& str, | 359 bool StartsWithASCII(const std::string& str, |
| 360 const std::string& search, | 360 const std::string& search, |
| 361 bool case_sensitive); | 361 bool case_sensitive); |
| 362 bool StartsWith(const std::wstring& str, | 362 bool StartsWith(const std::wstring& str, |
| 363 const std::wstring& search, | 363 const std::wstring& search, |
| 364 bool case_sensitive); | 364 bool case_sensitive); |
| 365 | 365 |
| 366 // Returns true if str ends with search, or false otherwise. | 366 // Returns true if str ends with search, or false otherwise. |
| (...skipping 293 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 660 #elif defined(WCHAR_T_IS_UTF32) | 660 #elif defined(WCHAR_T_IS_UTF32) |
| 661 typedef uint32 Unsigned; | 661 typedef uint32 Unsigned; |
| 662 #endif | 662 #endif |
| 663 }; | 663 }; |
| 664 template<> | 664 template<> |
| 665 struct ToUnsigned<short> { | 665 struct ToUnsigned<short> { |
| 666 typedef unsigned short Unsigned; | 666 typedef unsigned short Unsigned; |
| 667 }; | 667 }; |
| 668 | 668 |
| 669 #endif // BASE_STRING_UTIL_H_ | 669 #endif // BASE_STRING_UTIL_H_ |
| OLD | NEW |