OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 // | 4 // |
5 // This file defines utility functions for working with strings. | 5 // This file defines utility functions for working with strings. |
6 | 6 |
7 #ifndef BASE_STRINGS_STRING_UTIL_H_ | 7 #ifndef BASE_STRINGS_STRING_UTIL_H_ |
8 #define BASE_STRINGS_STRING_UTIL_H_ | 8 #define BASE_STRINGS_STRING_UTIL_H_ |
9 | 9 |
10 #include <ctype.h> | 10 #include <ctype.h> |
(...skipping 29 matching lines...) Expand all Loading... |
40 | 40 |
41 // Same as strncmp but for char16 strings. | 41 // Same as strncmp but for char16 strings. |
42 int strncmp16(const char16* s1, const char16* s2, size_t count); | 42 int strncmp16(const char16* s1, const char16* s2, size_t count); |
43 | 43 |
44 // Wrapper for vsnprintf that always null-terminates and always returns the | 44 // Wrapper for vsnprintf that always null-terminates and always returns the |
45 // number of characters that would be in an untruncated formatted | 45 // number of characters that would be in an untruncated formatted |
46 // string, even when truncation occurs. | 46 // string, even when truncation occurs. |
47 int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments) | 47 int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments) |
48 PRINTF_FORMAT(3, 0); | 48 PRINTF_FORMAT(3, 0); |
49 | 49 |
50 // vswprintf always null-terminates, but when truncation occurs, it will either | |
51 // return -1 or the number of characters that would be in an untruncated | |
52 // formatted string. The actual return value depends on the underlying | |
53 // C library's vswprintf implementation. | |
54 int vswprintf(wchar_t* buffer, size_t size, | |
55 const wchar_t* format, va_list arguments) | |
56 WPRINTF_FORMAT(3, 0); | |
57 | |
58 // Some of these implementations need to be inlined. | 50 // Some of these implementations need to be inlined. |
59 | 51 |
60 // We separate the declaration from the implementation of this inline | 52 // We separate the declaration from the implementation of this inline |
61 // function just so the PRINTF_FORMAT works. | 53 // function just so the PRINTF_FORMAT works. |
62 inline int snprintf(char* buffer, size_t size, const char* format, ...) | 54 inline int snprintf(char* buffer, size_t size, const char* format, ...) |
63 PRINTF_FORMAT(3, 4); | 55 PRINTF_FORMAT(3, 4); |
64 inline int snprintf(char* buffer, size_t size, const char* format, ...) { | 56 inline int snprintf(char* buffer, size_t size, const char* format, ...) { |
65 va_list arguments; | 57 va_list arguments; |
66 va_start(arguments, format); | 58 va_start(arguments, format); |
67 int result = vsnprintf(buffer, size, format, arguments); | 59 int result = vsnprintf(buffer, size, format, arguments); |
68 va_end(arguments); | 60 va_end(arguments); |
69 return result; | 61 return result; |
70 } | 62 } |
71 | 63 |
72 // We separate the declaration from the implementation of this inline | |
73 // function just so the WPRINTF_FORMAT works. | |
74 inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...) | |
75 WPRINTF_FORMAT(3, 4); | |
76 inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...) { | |
77 va_list arguments; | |
78 va_start(arguments, format); | |
79 int result = vswprintf(buffer, size, format, arguments); | |
80 va_end(arguments); | |
81 return result; | |
82 } | |
83 | |
84 // BSD-style safe and consistent string copy functions. | 64 // BSD-style safe and consistent string copy functions. |
85 // Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|. | 65 // Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|. |
86 // Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as | 66 // Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as |
87 // long as |dst_size| is not 0. Returns the length of |src| in characters. | 67 // long as |dst_size| is not 0. Returns the length of |src| in characters. |
88 // If the return value is >= dst_size, then the output was truncated. | 68 // If the return value is >= dst_size, then the output was truncated. |
89 // NOTE: All sizes are in number of characters, NOT in bytes. | 69 // NOTE: All sizes are in number of characters, NOT in bytes. |
90 BASE_EXPORT size_t strlcpy(char* dst, const char* src, size_t dst_size); | 70 BASE_EXPORT size_t strlcpy(char* dst, const char* src, size_t dst_size); |
91 BASE_EXPORT size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size); | 71 BASE_EXPORT size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size); |
92 | 72 |
93 // Scan a wprintf format string to determine whether it's portable across a | 73 // Scan a wprintf format string to determine whether it's portable across a |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
156 // These threadsafe functions return references to globally unique empty | 136 // These threadsafe functions return references to globally unique empty |
157 // strings. | 137 // strings. |
158 // | 138 // |
159 // DO NOT USE THESE AS A GENERAL-PURPOSE SUBSTITUTE FOR DEFAULT CONSTRUCTORS. | 139 // DO NOT USE THESE AS A GENERAL-PURPOSE SUBSTITUTE FOR DEFAULT CONSTRUCTORS. |
160 // There is only one case where you should use these: functions which need to | 140 // There is only one case where you should use these: functions which need to |
161 // return a string by reference (e.g. as a class member accessor), and don't | 141 // return a string by reference (e.g. as a class member accessor), and don't |
162 // have an empty string to use (e.g. in an error case). These should not be | 142 // have an empty string to use (e.g. in an error case). These should not be |
163 // used as initializers, function arguments, or return values for functions | 143 // used as initializers, function arguments, or return values for functions |
164 // which return by value or outparam. | 144 // which return by value or outparam. |
165 BASE_EXPORT const std::string& EmptyString(); | 145 BASE_EXPORT const std::string& EmptyString(); |
166 BASE_EXPORT const std::wstring& EmptyWString(); | |
167 BASE_EXPORT const string16& EmptyString16(); | 146 BASE_EXPORT const string16& EmptyString16(); |
168 | 147 |
169 BASE_EXPORT extern const wchar_t kWhitespaceWide[]; | 148 BASE_EXPORT extern const wchar_t kWhitespaceWide[]; |
170 BASE_EXPORT extern const char16 kWhitespaceUTF16[]; | 149 BASE_EXPORT extern const char16 kWhitespaceUTF16[]; |
171 BASE_EXPORT extern const char kWhitespaceASCII[]; | 150 BASE_EXPORT extern const char kWhitespaceASCII[]; |
172 | 151 |
173 BASE_EXPORT extern const char kUtf8ByteOrderMark[]; | 152 BASE_EXPORT extern const char kUtf8ByteOrderMark[]; |
174 | 153 |
175 // Removes characters in |remove_chars| from anywhere in |input|. Returns true | 154 // Removes characters in |remove_chars| from anywhere in |input|. Returns true |
176 // if any characters were removed. |remove_chars| must be null-terminated. | 155 // if any characters were removed. |remove_chars| must be null-terminated. |
(...skipping 15 matching lines...) Expand all Loading... |
192 const string16& replace_with, | 171 const string16& replace_with, |
193 string16* output); | 172 string16* output); |
194 BASE_EXPORT bool ReplaceChars(const std::string& input, | 173 BASE_EXPORT bool ReplaceChars(const std::string& input, |
195 const char replace_chars[], | 174 const char replace_chars[], |
196 const std::string& replace_with, | 175 const std::string& replace_with, |
197 std::string* output); | 176 std::string* output); |
198 | 177 |
199 // Removes characters in |trim_chars| from the beginning and end of |input|. | 178 // Removes characters in |trim_chars| from the beginning and end of |input|. |
200 // |trim_chars| must be null-terminated. | 179 // |trim_chars| must be null-terminated. |
201 // NOTE: Safe to use the same variable for both |input| and |output|. | 180 // NOTE: Safe to use the same variable for both |input| and |output|. |
202 BASE_EXPORT bool TrimString(const std::wstring& input, | |
203 const wchar_t trim_chars[], | |
204 std::wstring* output); | |
205 BASE_EXPORT bool TrimString(const string16& input, | 181 BASE_EXPORT bool TrimString(const string16& input, |
206 const char16 trim_chars[], | 182 const char16 trim_chars[], |
207 string16* output); | 183 string16* output); |
208 BASE_EXPORT bool TrimString(const std::string& input, | 184 BASE_EXPORT bool TrimString(const std::string& input, |
209 const char trim_chars[], | 185 const char trim_chars[], |
210 std::string* output); | 186 std::string* output); |
211 | 187 |
212 // Truncates a string to the nearest UTF-8 character that will leave | 188 // Truncates a string to the nearest UTF-8 character that will leave |
213 // the string less than or equal to the specified byte size. | 189 // the string less than or equal to the specified byte size. |
214 BASE_EXPORT void TruncateUTF8ToByteSize(const std::string& input, | 190 BASE_EXPORT void TruncateUTF8ToByteSize(const std::string& input, |
(...skipping 27 matching lines...) Expand all Loading... |
242 std::string* output); | 218 std::string* output); |
243 | 219 |
244 // Searches for CR or LF characters. Removes all contiguous whitespace | 220 // Searches for CR or LF characters. Removes all contiguous whitespace |
245 // strings that contain them. This is useful when trying to deal with text | 221 // strings that contain them. This is useful when trying to deal with text |
246 // copied from terminals. | 222 // copied from terminals. |
247 // Returns |text|, with the following three transformations: | 223 // Returns |text|, with the following three transformations: |
248 // (1) Leading and trailing whitespace is trimmed. | 224 // (1) Leading and trailing whitespace is trimmed. |
249 // (2) If |trim_sequences_with_line_breaks| is true, any other whitespace | 225 // (2) If |trim_sequences_with_line_breaks| is true, any other whitespace |
250 // sequences containing a CR or LF are trimmed. | 226 // sequences containing a CR or LF are trimmed. |
251 // (3) All other whitespace sequences are converted to single spaces. | 227 // (3) All other whitespace sequences are converted to single spaces. |
252 BASE_EXPORT std::wstring CollapseWhitespace( | |
253 const std::wstring& text, | |
254 bool trim_sequences_with_line_breaks); | |
255 BASE_EXPORT string16 CollapseWhitespace( | 228 BASE_EXPORT string16 CollapseWhitespace( |
256 const string16& text, | 229 const string16& text, |
257 bool trim_sequences_with_line_breaks); | 230 bool trim_sequences_with_line_breaks); |
258 BASE_EXPORT std::string CollapseWhitespaceASCII( | 231 BASE_EXPORT std::string CollapseWhitespaceASCII( |
259 const std::string& text, | 232 const std::string& text, |
260 bool trim_sequences_with_line_breaks); | 233 bool trim_sequences_with_line_breaks); |
261 | 234 |
262 // Returns true if the passed string is empty or contains only white-space | 235 // Returns true if the passed string is empty or contains only white-space |
263 // characters. | 236 // characters. |
264 BASE_EXPORT bool ContainsOnlyWhitespaceASCII(const std::string& str); | 237 BASE_EXPORT bool ContainsOnlyWhitespaceASCII(const std::string& str); |
265 BASE_EXPORT bool ContainsOnlyWhitespace(const string16& str); | 238 BASE_EXPORT bool ContainsOnlyWhitespace(const string16& str); |
266 | 239 |
267 // Returns true if |input| is empty or contains only characters found in | 240 // Returns true if |input| is empty or contains only characters found in |
268 // |characters|. | 241 // |characters|. |
269 BASE_EXPORT bool ContainsOnlyChars(const std::wstring& input, | |
270 const std::wstring& characters); | |
271 BASE_EXPORT bool ContainsOnlyChars(const string16& input, | 242 BASE_EXPORT bool ContainsOnlyChars(const string16& input, |
272 const string16& characters); | 243 const string16& characters); |
273 BASE_EXPORT bool ContainsOnlyChars(const std::string& input, | 244 BASE_EXPORT bool ContainsOnlyChars(const std::string& input, |
274 const std::string& characters); | 245 const std::string& characters); |
275 | 246 |
276 // Converts to 7-bit ASCII by truncating. The result must be known to be ASCII | 247 // Converts to 7-bit ASCII by truncating. The result must be known to be ASCII |
277 // beforehand. | 248 // beforehand. |
278 BASE_EXPORT std::string WideToASCII(const std::wstring& wide); | 249 BASE_EXPORT std::string WideToASCII(const std::wstring& wide); |
279 BASE_EXPORT std::string UTF16ToASCII(const string16& utf16); | 250 BASE_EXPORT std::string UTF16ToASCII(const string16& utf16); |
280 | 251 |
281 // Converts the given wide string to the corresponding Latin1. This will fail | |
282 // (return false) if any characters are more than 255. | |
283 BASE_EXPORT bool WideToLatin1(const std::wstring& wide, std::string* latin1); | |
284 | |
285 // Returns true if the specified string matches the criteria. How can a wide | 252 // Returns true if the specified string matches the criteria. How can a wide |
286 // string be 8-bit or UTF8? It contains only characters that are < 256 (in the | 253 // string be 8-bit or UTF8? It contains only characters that are < 256 (in the |
287 // first case) or characters that use only 8-bits and whose 8-bit | 254 // first case) or characters that use only 8-bits and whose 8-bit |
288 // representation looks like a UTF-8 string (the second case). | 255 // representation looks like a UTF-8 string (the second case). |
289 // | 256 // |
290 // Note that IsStringUTF8 checks not only if the input is structurally | 257 // Note that IsStringUTF8 checks not only if the input is structurally |
291 // valid but also if it doesn't contain any non-character codepoint | 258 // valid but also if it doesn't contain any non-character codepoint |
292 // (e.g. U+FFFE). It's done on purpose because all the existing callers want | 259 // (e.g. U+FFFE). It's done on purpose because all the existing callers want |
293 // to have the maximum 'discriminating' power from other encodings. If | 260 // to have the maximum 'discriminating' power from other encodings. If |
294 // there's a use case for just checking the structural validity, we have to | 261 // there's a use case for just checking the structural validity, we have to |
295 // add a new function for that. | 262 // add a new function for that. |
296 BASE_EXPORT bool IsStringUTF8(const std::string& str); | 263 BASE_EXPORT bool IsStringUTF8(const std::string& str); |
297 BASE_EXPORT bool IsStringASCII(const std::wstring& str); | |
298 BASE_EXPORT bool IsStringASCII(const base::StringPiece& str); | 264 BASE_EXPORT bool IsStringASCII(const base::StringPiece& str); |
299 BASE_EXPORT bool IsStringASCII(const string16& str); | 265 BASE_EXPORT bool IsStringASCII(const string16& str); |
300 | 266 |
301 // Converts the elements of the given string. This version uses a pointer to | 267 // Converts the elements of the given string. This version uses a pointer to |
302 // clearly differentiate it from the non-pointer variant. | 268 // clearly differentiate it from the non-pointer variant. |
303 template <class str> inline void StringToLowerASCII(str* s) { | 269 template <class str> inline void StringToLowerASCII(str* s) { |
304 for (typename str::iterator i = s->begin(); i != s->end(); ++i) | 270 for (typename str::iterator i = s->begin(); i != s->end(); ++i) |
305 *i = base::ToLowerASCII(*i); | 271 *i = base::ToLowerASCII(*i); |
306 } | 272 } |
307 | 273 |
(...skipping 16 matching lines...) Expand all Loading... |
324 str output(s); | 290 str output(s); |
325 StringToUpperASCII(&output); | 291 StringToUpperASCII(&output); |
326 return output; | 292 return output; |
327 } | 293 } |
328 | 294 |
329 // Compare the lower-case form of the given string against the given ASCII | 295 // Compare the lower-case form of the given string against the given ASCII |
330 // string. This is useful for doing checking if an input string matches some | 296 // string. This is useful for doing checking if an input string matches some |
331 // token, and it is optimized to avoid intermediate string copies. This API is | 297 // token, and it is optimized to avoid intermediate string copies. This API is |
332 // borrowed from the equivalent APIs in Mozilla. | 298 // borrowed from the equivalent APIs in Mozilla. |
333 BASE_EXPORT bool LowerCaseEqualsASCII(const std::string& a, const char* b); | 299 BASE_EXPORT bool LowerCaseEqualsASCII(const std::string& a, const char* b); |
334 BASE_EXPORT bool LowerCaseEqualsASCII(const std::wstring& a, const char* b); | |
335 BASE_EXPORT bool LowerCaseEqualsASCII(const string16& a, const char* b); | 300 BASE_EXPORT bool LowerCaseEqualsASCII(const string16& a, const char* b); |
336 | 301 |
337 // Same thing, but with string iterators instead. | 302 // Same thing, but with string iterators instead. |
338 BASE_EXPORT bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, | 303 BASE_EXPORT bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, |
339 std::string::const_iterator a_end, | 304 std::string::const_iterator a_end, |
340 const char* b); | 305 const char* b); |
341 BASE_EXPORT bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin, | |
342 std::wstring::const_iterator a_end, | |
343 const char* b); | |
344 BASE_EXPORT bool LowerCaseEqualsASCII(string16::const_iterator a_begin, | 306 BASE_EXPORT bool LowerCaseEqualsASCII(string16::const_iterator a_begin, |
345 string16::const_iterator a_end, | 307 string16::const_iterator a_end, |
346 const char* b); | 308 const char* b); |
347 BASE_EXPORT bool LowerCaseEqualsASCII(const char* a_begin, | 309 BASE_EXPORT bool LowerCaseEqualsASCII(const char* a_begin, |
348 const char* a_end, | 310 const char* a_end, |
349 const char* b); | 311 const char* b); |
350 BASE_EXPORT bool LowerCaseEqualsASCII(const wchar_t* a_begin, | |
351 const wchar_t* a_end, | |
352 const char* b); | |
353 BASE_EXPORT bool LowerCaseEqualsASCII(const char16* a_begin, | 312 BASE_EXPORT bool LowerCaseEqualsASCII(const char16* a_begin, |
354 const char16* a_end, | 313 const char16* a_end, |
355 const char* b); | 314 const char* b); |
356 | 315 |
357 // Performs a case-sensitive string compare. The behavior is undefined if both | 316 // Performs a case-sensitive string compare. The behavior is undefined if both |
358 // strings are not ASCII. | 317 // strings are not ASCII. |
359 BASE_EXPORT bool EqualsASCII(const string16& a, const base::StringPiece& b); | 318 BASE_EXPORT bool EqualsASCII(const string16& a, const base::StringPiece& b); |
360 | 319 |
361 // Returns true if str starts with search, or false otherwise. | 320 // Returns true if str starts with search, or false otherwise. |
362 BASE_EXPORT bool StartsWithASCII(const std::string& str, | 321 BASE_EXPORT bool StartsWithASCII(const std::string& str, |
363 const std::string& search, | 322 const std::string& search, |
364 bool case_sensitive); | 323 bool case_sensitive); |
365 BASE_EXPORT bool StartsWith(const std::wstring& str, | |
366 const std::wstring& search, | |
367 bool case_sensitive); | |
368 BASE_EXPORT bool StartsWith(const string16& str, | 324 BASE_EXPORT bool StartsWith(const string16& str, |
369 const string16& search, | 325 const string16& search, |
370 bool case_sensitive); | 326 bool case_sensitive); |
371 | 327 |
372 // Returns true if str ends with search, or false otherwise. | 328 // Returns true if str ends with search, or false otherwise. |
373 BASE_EXPORT bool EndsWith(const std::string& str, | 329 BASE_EXPORT bool EndsWith(const std::string& str, |
374 const std::string& search, | 330 const std::string& search, |
375 bool case_sensitive); | 331 bool case_sensitive); |
376 BASE_EXPORT bool EndsWith(const std::wstring& str, | |
377 const std::wstring& search, | |
378 bool case_sensitive); | |
379 BASE_EXPORT bool EndsWith(const string16& str, | 332 BASE_EXPORT bool EndsWith(const string16& str, |
380 const string16& search, | 333 const string16& search, |
381 bool case_sensitive); | 334 bool case_sensitive); |
382 | 335 |
383 | 336 |
384 // Determines the type of ASCII character, independent of locale (the C | 337 // Determines the type of ASCII character, independent of locale (the C |
385 // library versions will change based on locale). | 338 // library versions will change based on locale). |
386 template <typename Char> | 339 template <typename Char> |
387 inline bool IsAsciiWhitespace(Char c) { | 340 inline bool IsAsciiWhitespace(Char c) { |
388 return c == ' ' || c == '\r' || c == '\n' || c == '\t'; | 341 return c == ' ' || c == '\r' || c == '\n' || c == '\t'; |
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
483 str->reserve(length_with_null); | 436 str->reserve(length_with_null); |
484 str->resize(length_with_null - 1); | 437 str->resize(length_with_null - 1); |
485 return &((*str)[0]); | 438 return &((*str)[0]); |
486 } | 439 } |
487 | 440 |
488 //----------------------------------------------------------------------------- | 441 //----------------------------------------------------------------------------- |
489 | 442 |
490 // Splits a string into its fields delimited by any of the characters in | 443 // Splits a string into its fields delimited by any of the characters in |
491 // |delimiters|. Each field is added to the |tokens| vector. Returns the | 444 // |delimiters|. Each field is added to the |tokens| vector. Returns the |
492 // number of tokens found. | 445 // number of tokens found. |
493 BASE_EXPORT size_t Tokenize(const std::wstring& str, | |
494 const std::wstring& delimiters, | |
495 std::vector<std::wstring>* tokens); | |
496 BASE_EXPORT size_t Tokenize(const string16& str, | 446 BASE_EXPORT size_t Tokenize(const string16& str, |
497 const string16& delimiters, | 447 const string16& delimiters, |
498 std::vector<string16>* tokens); | 448 std::vector<string16>* tokens); |
499 BASE_EXPORT size_t Tokenize(const std::string& str, | 449 BASE_EXPORT size_t Tokenize(const std::string& str, |
500 const std::string& delimiters, | 450 const std::string& delimiters, |
501 std::vector<std::string>* tokens); | 451 std::vector<std::string>* tokens); |
502 BASE_EXPORT size_t Tokenize(const base::StringPiece& str, | 452 BASE_EXPORT size_t Tokenize(const base::StringPiece& str, |
503 const base::StringPiece& delimiters, | 453 const base::StringPiece& delimiters, |
504 std::vector<base::StringPiece>* tokens); | 454 std::vector<base::StringPiece>* tokens); |
505 | 455 |
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
567 #elif defined(WCHAR_T_IS_UTF32) | 517 #elif defined(WCHAR_T_IS_UTF32) |
568 typedef uint32 Unsigned; | 518 typedef uint32 Unsigned; |
569 #endif | 519 #endif |
570 }; | 520 }; |
571 template<> | 521 template<> |
572 struct ToUnsigned<short> { | 522 struct ToUnsigned<short> { |
573 typedef unsigned short Unsigned; | 523 typedef unsigned short Unsigned; |
574 }; | 524 }; |
575 | 525 |
576 #endif // BASE_STRINGS_STRING_UTIL_H_ | 526 #endif // BASE_STRINGS_STRING_UTIL_H_ |
OLD | NEW |