Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(11)

Side by Side Diff: base/strings/string_util.h

Issue 80813002: Cleanup: Remove many unused wstring string_utils. (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | base/strings/string_util.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 // 4 //
5 // This file defines utility functions for working with strings. 5 // This file defines utility functions for working with strings.
6 6
7 #ifndef BASE_STRINGS_STRING_UTIL_H_ 7 #ifndef BASE_STRINGS_STRING_UTIL_H_
8 #define BASE_STRINGS_STRING_UTIL_H_ 8 #define BASE_STRINGS_STRING_UTIL_H_
9 9
10 #include <ctype.h> 10 #include <ctype.h>
(...skipping 29 matching lines...) Expand all
40 40
41 // Same as strncmp but for char16 strings. 41 // Same as strncmp but for char16 strings.
42 int strncmp16(const char16* s1, const char16* s2, size_t count); 42 int strncmp16(const char16* s1, const char16* s2, size_t count);
43 43
44 // Wrapper for vsnprintf that always null-terminates and always returns the 44 // Wrapper for vsnprintf that always null-terminates and always returns the
45 // number of characters that would be in an untruncated formatted 45 // number of characters that would be in an untruncated formatted
46 // string, even when truncation occurs. 46 // string, even when truncation occurs.
47 int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments) 47 int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments)
48 PRINTF_FORMAT(3, 0); 48 PRINTF_FORMAT(3, 0);
49 49
50 // vswprintf always null-terminates, but when truncation occurs, it will either
51 // return -1 or the number of characters that would be in an untruncated
52 // formatted string. The actual return value depends on the underlying
53 // C library's vswprintf implementation.
54 int vswprintf(wchar_t* buffer, size_t size,
55 const wchar_t* format, va_list arguments)
56 WPRINTF_FORMAT(3, 0);
57
58 // Some of these implementations need to be inlined. 50 // Some of these implementations need to be inlined.
59 51
60 // We separate the declaration from the implementation of this inline 52 // We separate the declaration from the implementation of this inline
61 // function just so the PRINTF_FORMAT works. 53 // function just so the PRINTF_FORMAT works.
62 inline int snprintf(char* buffer, size_t size, const char* format, ...) 54 inline int snprintf(char* buffer, size_t size, const char* format, ...)
63 PRINTF_FORMAT(3, 4); 55 PRINTF_FORMAT(3, 4);
64 inline int snprintf(char* buffer, size_t size, const char* format, ...) { 56 inline int snprintf(char* buffer, size_t size, const char* format, ...) {
65 va_list arguments; 57 va_list arguments;
66 va_start(arguments, format); 58 va_start(arguments, format);
67 int result = vsnprintf(buffer, size, format, arguments); 59 int result = vsnprintf(buffer, size, format, arguments);
68 va_end(arguments); 60 va_end(arguments);
69 return result; 61 return result;
70 } 62 }
71 63
72 // We separate the declaration from the implementation of this inline
73 // function just so the WPRINTF_FORMAT works.
74 inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...)
75 WPRINTF_FORMAT(3, 4);
76 inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...) {
77 va_list arguments;
78 va_start(arguments, format);
79 int result = vswprintf(buffer, size, format, arguments);
80 va_end(arguments);
81 return result;
82 }
83
84 // BSD-style safe and consistent string copy functions. 64 // BSD-style safe and consistent string copy functions.
85 // Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|. 65 // Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|.
86 // Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as 66 // Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as
87 // long as |dst_size| is not 0. Returns the length of |src| in characters. 67 // long as |dst_size| is not 0. Returns the length of |src| in characters.
88 // If the return value is >= dst_size, then the output was truncated. 68 // If the return value is >= dst_size, then the output was truncated.
89 // NOTE: All sizes are in number of characters, NOT in bytes. 69 // NOTE: All sizes are in number of characters, NOT in bytes.
90 BASE_EXPORT size_t strlcpy(char* dst, const char* src, size_t dst_size); 70 BASE_EXPORT size_t strlcpy(char* dst, const char* src, size_t dst_size);
91 BASE_EXPORT size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size); 71 BASE_EXPORT size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size);
92 72
93 // Scan a wprintf format string to determine whether it's portable across a 73 // Scan a wprintf format string to determine whether it's portable across a
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
156 // These threadsafe functions return references to globally unique empty 136 // These threadsafe functions return references to globally unique empty
157 // strings. 137 // strings.
158 // 138 //
159 // DO NOT USE THESE AS A GENERAL-PURPOSE SUBSTITUTE FOR DEFAULT CONSTRUCTORS. 139 // DO NOT USE THESE AS A GENERAL-PURPOSE SUBSTITUTE FOR DEFAULT CONSTRUCTORS.
160 // There is only one case where you should use these: functions which need to 140 // There is only one case where you should use these: functions which need to
161 // return a string by reference (e.g. as a class member accessor), and don't 141 // return a string by reference (e.g. as a class member accessor), and don't
162 // have an empty string to use (e.g. in an error case). These should not be 142 // have an empty string to use (e.g. in an error case). These should not be
163 // used as initializers, function arguments, or return values for functions 143 // used as initializers, function arguments, or return values for functions
164 // which return by value or outparam. 144 // which return by value or outparam.
165 BASE_EXPORT const std::string& EmptyString(); 145 BASE_EXPORT const std::string& EmptyString();
166 BASE_EXPORT const std::wstring& EmptyWString();
167 BASE_EXPORT const string16& EmptyString16(); 146 BASE_EXPORT const string16& EmptyString16();
168 147
169 BASE_EXPORT extern const wchar_t kWhitespaceWide[]; 148 BASE_EXPORT extern const wchar_t kWhitespaceWide[];
170 BASE_EXPORT extern const char16 kWhitespaceUTF16[]; 149 BASE_EXPORT extern const char16 kWhitespaceUTF16[];
171 BASE_EXPORT extern const char kWhitespaceASCII[]; 150 BASE_EXPORT extern const char kWhitespaceASCII[];
172 151
173 BASE_EXPORT extern const char kUtf8ByteOrderMark[]; 152 BASE_EXPORT extern const char kUtf8ByteOrderMark[];
174 153
175 // Removes characters in |remove_chars| from anywhere in |input|. Returns true 154 // Removes characters in |remove_chars| from anywhere in |input|. Returns true
176 // if any characters were removed. |remove_chars| must be null-terminated. 155 // if any characters were removed. |remove_chars| must be null-terminated.
(...skipping 15 matching lines...) Expand all
192 const string16& replace_with, 171 const string16& replace_with,
193 string16* output); 172 string16* output);
194 BASE_EXPORT bool ReplaceChars(const std::string& input, 173 BASE_EXPORT bool ReplaceChars(const std::string& input,
195 const char replace_chars[], 174 const char replace_chars[],
196 const std::string& replace_with, 175 const std::string& replace_with,
197 std::string* output); 176 std::string* output);
198 177
199 // Removes characters in |trim_chars| from the beginning and end of |input|. 178 // Removes characters in |trim_chars| from the beginning and end of |input|.
200 // |trim_chars| must be null-terminated. 179 // |trim_chars| must be null-terminated.
201 // NOTE: Safe to use the same variable for both |input| and |output|. 180 // NOTE: Safe to use the same variable for both |input| and |output|.
202 BASE_EXPORT bool TrimString(const std::wstring& input,
203 const wchar_t trim_chars[],
204 std::wstring* output);
205 BASE_EXPORT bool TrimString(const string16& input, 181 BASE_EXPORT bool TrimString(const string16& input,
206 const char16 trim_chars[], 182 const char16 trim_chars[],
207 string16* output); 183 string16* output);
208 BASE_EXPORT bool TrimString(const std::string& input, 184 BASE_EXPORT bool TrimString(const std::string& input,
209 const char trim_chars[], 185 const char trim_chars[],
210 std::string* output); 186 std::string* output);
211 187
212 // Truncates a string to the nearest UTF-8 character that will leave 188 // Truncates a string to the nearest UTF-8 character that will leave
213 // the string less than or equal to the specified byte size. 189 // the string less than or equal to the specified byte size.
214 BASE_EXPORT void TruncateUTF8ToByteSize(const std::string& input, 190 BASE_EXPORT void TruncateUTF8ToByteSize(const std::string& input,
(...skipping 27 matching lines...) Expand all
242 std::string* output); 218 std::string* output);
243 219
244 // Searches for CR or LF characters. Removes all contiguous whitespace 220 // Searches for CR or LF characters. Removes all contiguous whitespace
245 // strings that contain them. This is useful when trying to deal with text 221 // strings that contain them. This is useful when trying to deal with text
246 // copied from terminals. 222 // copied from terminals.
247 // Returns |text|, with the following three transformations: 223 // Returns |text|, with the following three transformations:
248 // (1) Leading and trailing whitespace is trimmed. 224 // (1) Leading and trailing whitespace is trimmed.
249 // (2) If |trim_sequences_with_line_breaks| is true, any other whitespace 225 // (2) If |trim_sequences_with_line_breaks| is true, any other whitespace
250 // sequences containing a CR or LF are trimmed. 226 // sequences containing a CR or LF are trimmed.
251 // (3) All other whitespace sequences are converted to single spaces. 227 // (3) All other whitespace sequences are converted to single spaces.
252 BASE_EXPORT std::wstring CollapseWhitespace(
253 const std::wstring& text,
254 bool trim_sequences_with_line_breaks);
255 BASE_EXPORT string16 CollapseWhitespace( 228 BASE_EXPORT string16 CollapseWhitespace(
256 const string16& text, 229 const string16& text,
257 bool trim_sequences_with_line_breaks); 230 bool trim_sequences_with_line_breaks);
258 BASE_EXPORT std::string CollapseWhitespaceASCII( 231 BASE_EXPORT std::string CollapseWhitespaceASCII(
259 const std::string& text, 232 const std::string& text,
260 bool trim_sequences_with_line_breaks); 233 bool trim_sequences_with_line_breaks);
261 234
262 // Returns true if the passed string is empty or contains only white-space 235 // Returns true if the passed string is empty or contains only white-space
263 // characters. 236 // characters.
264 BASE_EXPORT bool ContainsOnlyWhitespaceASCII(const std::string& str); 237 BASE_EXPORT bool ContainsOnlyWhitespaceASCII(const std::string& str);
265 BASE_EXPORT bool ContainsOnlyWhitespace(const string16& str); 238 BASE_EXPORT bool ContainsOnlyWhitespace(const string16& str);
266 239
267 // Returns true if |input| is empty or contains only characters found in 240 // Returns true if |input| is empty or contains only characters found in
268 // |characters|. 241 // |characters|.
269 BASE_EXPORT bool ContainsOnlyChars(const std::wstring& input,
270 const std::wstring& characters);
271 BASE_EXPORT bool ContainsOnlyChars(const string16& input, 242 BASE_EXPORT bool ContainsOnlyChars(const string16& input,
272 const string16& characters); 243 const string16& characters);
273 BASE_EXPORT bool ContainsOnlyChars(const std::string& input, 244 BASE_EXPORT bool ContainsOnlyChars(const std::string& input,
274 const std::string& characters); 245 const std::string& characters);
275 246
276 // Converts to 7-bit ASCII by truncating. The result must be known to be ASCII 247 // Converts to 7-bit ASCII by truncating. The result must be known to be ASCII
277 // beforehand. 248 // beforehand.
278 BASE_EXPORT std::string WideToASCII(const std::wstring& wide); 249 BASE_EXPORT std::string WideToASCII(const std::wstring& wide);
279 BASE_EXPORT std::string UTF16ToASCII(const string16& utf16); 250 BASE_EXPORT std::string UTF16ToASCII(const string16& utf16);
280 251
281 // Converts the given wide string to the corresponding Latin1. This will fail
282 // (return false) if any characters are more than 255.
283 BASE_EXPORT bool WideToLatin1(const std::wstring& wide, std::string* latin1);
284
285 // Returns true if the specified string matches the criteria. How can a wide 252 // Returns true if the specified string matches the criteria. How can a wide
286 // string be 8-bit or UTF8? It contains only characters that are < 256 (in the 253 // string be 8-bit or UTF8? It contains only characters that are < 256 (in the
287 // first case) or characters that use only 8-bits and whose 8-bit 254 // first case) or characters that use only 8-bits and whose 8-bit
288 // representation looks like a UTF-8 string (the second case). 255 // representation looks like a UTF-8 string (the second case).
289 // 256 //
290 // Note that IsStringUTF8 checks not only if the input is structurally 257 // Note that IsStringUTF8 checks not only if the input is structurally
291 // valid but also if it doesn't contain any non-character codepoint 258 // valid but also if it doesn't contain any non-character codepoint
292 // (e.g. U+FFFE). It's done on purpose because all the existing callers want 259 // (e.g. U+FFFE). It's done on purpose because all the existing callers want
293 // to have the maximum 'discriminating' power from other encodings. If 260 // to have the maximum 'discriminating' power from other encodings. If
294 // there's a use case for just checking the structural validity, we have to 261 // there's a use case for just checking the structural validity, we have to
295 // add a new function for that. 262 // add a new function for that.
296 BASE_EXPORT bool IsStringUTF8(const std::string& str); 263 BASE_EXPORT bool IsStringUTF8(const std::string& str);
297 BASE_EXPORT bool IsStringASCII(const std::wstring& str);
298 BASE_EXPORT bool IsStringASCII(const base::StringPiece& str); 264 BASE_EXPORT bool IsStringASCII(const base::StringPiece& str);
299 BASE_EXPORT bool IsStringASCII(const string16& str); 265 BASE_EXPORT bool IsStringASCII(const string16& str);
300 266
301 // Converts the elements of the given string. This version uses a pointer to 267 // Converts the elements of the given string. This version uses a pointer to
302 // clearly differentiate it from the non-pointer variant. 268 // clearly differentiate it from the non-pointer variant.
303 template <class str> inline void StringToLowerASCII(str* s) { 269 template <class str> inline void StringToLowerASCII(str* s) {
304 for (typename str::iterator i = s->begin(); i != s->end(); ++i) 270 for (typename str::iterator i = s->begin(); i != s->end(); ++i)
305 *i = base::ToLowerASCII(*i); 271 *i = base::ToLowerASCII(*i);
306 } 272 }
307 273
(...skipping 16 matching lines...) Expand all
324 str output(s); 290 str output(s);
325 StringToUpperASCII(&output); 291 StringToUpperASCII(&output);
326 return output; 292 return output;
327 } 293 }
328 294
329 // Compare the lower-case form of the given string against the given ASCII 295 // Compare the lower-case form of the given string against the given ASCII
330 // string. This is useful for doing checking if an input string matches some 296 // string. This is useful for doing checking if an input string matches some
331 // token, and it is optimized to avoid intermediate string copies. This API is 297 // token, and it is optimized to avoid intermediate string copies. This API is
332 // borrowed from the equivalent APIs in Mozilla. 298 // borrowed from the equivalent APIs in Mozilla.
333 BASE_EXPORT bool LowerCaseEqualsASCII(const std::string& a, const char* b); 299 BASE_EXPORT bool LowerCaseEqualsASCII(const std::string& a, const char* b);
334 BASE_EXPORT bool LowerCaseEqualsASCII(const std::wstring& a, const char* b);
335 BASE_EXPORT bool LowerCaseEqualsASCII(const string16& a, const char* b); 300 BASE_EXPORT bool LowerCaseEqualsASCII(const string16& a, const char* b);
336 301
337 // Same thing, but with string iterators instead. 302 // Same thing, but with string iterators instead.
338 BASE_EXPORT bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, 303 BASE_EXPORT bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
339 std::string::const_iterator a_end, 304 std::string::const_iterator a_end,
340 const char* b); 305 const char* b);
341 BASE_EXPORT bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin,
342 std::wstring::const_iterator a_end,
343 const char* b);
344 BASE_EXPORT bool LowerCaseEqualsASCII(string16::const_iterator a_begin, 306 BASE_EXPORT bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
345 string16::const_iterator a_end, 307 string16::const_iterator a_end,
346 const char* b); 308 const char* b);
347 BASE_EXPORT bool LowerCaseEqualsASCII(const char* a_begin, 309 BASE_EXPORT bool LowerCaseEqualsASCII(const char* a_begin,
348 const char* a_end, 310 const char* a_end,
349 const char* b); 311 const char* b);
350 BASE_EXPORT bool LowerCaseEqualsASCII(const wchar_t* a_begin,
351 const wchar_t* a_end,
352 const char* b);
353 BASE_EXPORT bool LowerCaseEqualsASCII(const char16* a_begin, 312 BASE_EXPORT bool LowerCaseEqualsASCII(const char16* a_begin,
354 const char16* a_end, 313 const char16* a_end,
355 const char* b); 314 const char* b);
356 315
357 // Performs a case-sensitive string compare. The behavior is undefined if both 316 // Performs a case-sensitive string compare. The behavior is undefined if both
358 // strings are not ASCII. 317 // strings are not ASCII.
359 BASE_EXPORT bool EqualsASCII(const string16& a, const base::StringPiece& b); 318 BASE_EXPORT bool EqualsASCII(const string16& a, const base::StringPiece& b);
360 319
361 // Returns true if str starts with search, or false otherwise. 320 // Returns true if str starts with search, or false otherwise.
362 BASE_EXPORT bool StartsWithASCII(const std::string& str, 321 BASE_EXPORT bool StartsWithASCII(const std::string& str,
363 const std::string& search, 322 const std::string& search,
364 bool case_sensitive); 323 bool case_sensitive);
365 BASE_EXPORT bool StartsWith(const std::wstring& str,
366 const std::wstring& search,
367 bool case_sensitive);
368 BASE_EXPORT bool StartsWith(const string16& str, 324 BASE_EXPORT bool StartsWith(const string16& str,
369 const string16& search, 325 const string16& search,
370 bool case_sensitive); 326 bool case_sensitive);
371 327
372 // Returns true if str ends with search, or false otherwise. 328 // Returns true if str ends with search, or false otherwise.
373 BASE_EXPORT bool EndsWith(const std::string& str, 329 BASE_EXPORT bool EndsWith(const std::string& str,
374 const std::string& search, 330 const std::string& search,
375 bool case_sensitive); 331 bool case_sensitive);
376 BASE_EXPORT bool EndsWith(const std::wstring& str,
377 const std::wstring& search,
378 bool case_sensitive);
379 BASE_EXPORT bool EndsWith(const string16& str, 332 BASE_EXPORT bool EndsWith(const string16& str,
380 const string16& search, 333 const string16& search,
381 bool case_sensitive); 334 bool case_sensitive);
382 335
383 336
384 // Determines the type of ASCII character, independent of locale (the C 337 // Determines the type of ASCII character, independent of locale (the C
385 // library versions will change based on locale). 338 // library versions will change based on locale).
386 template <typename Char> 339 template <typename Char>
387 inline bool IsAsciiWhitespace(Char c) { 340 inline bool IsAsciiWhitespace(Char c) {
388 return c == ' ' || c == '\r' || c == '\n' || c == '\t'; 341 return c == ' ' || c == '\r' || c == '\n' || c == '\t';
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
483 str->reserve(length_with_null); 436 str->reserve(length_with_null);
484 str->resize(length_with_null - 1); 437 str->resize(length_with_null - 1);
485 return &((*str)[0]); 438 return &((*str)[0]);
486 } 439 }
487 440
488 //----------------------------------------------------------------------------- 441 //-----------------------------------------------------------------------------
489 442
490 // Splits a string into its fields delimited by any of the characters in 443 // Splits a string into its fields delimited by any of the characters in
491 // |delimiters|. Each field is added to the |tokens| vector. Returns the 444 // |delimiters|. Each field is added to the |tokens| vector. Returns the
492 // number of tokens found. 445 // number of tokens found.
493 BASE_EXPORT size_t Tokenize(const std::wstring& str,
494 const std::wstring& delimiters,
495 std::vector<std::wstring>* tokens);
496 BASE_EXPORT size_t Tokenize(const string16& str, 446 BASE_EXPORT size_t Tokenize(const string16& str,
497 const string16& delimiters, 447 const string16& delimiters,
498 std::vector<string16>* tokens); 448 std::vector<string16>* tokens);
499 BASE_EXPORT size_t Tokenize(const std::string& str, 449 BASE_EXPORT size_t Tokenize(const std::string& str,
500 const std::string& delimiters, 450 const std::string& delimiters,
501 std::vector<std::string>* tokens); 451 std::vector<std::string>* tokens);
502 BASE_EXPORT size_t Tokenize(const base::StringPiece& str, 452 BASE_EXPORT size_t Tokenize(const base::StringPiece& str,
503 const base::StringPiece& delimiters, 453 const base::StringPiece& delimiters,
504 std::vector<base::StringPiece>* tokens); 454 std::vector<base::StringPiece>* tokens);
505 455
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
567 #elif defined(WCHAR_T_IS_UTF32) 517 #elif defined(WCHAR_T_IS_UTF32)
568 typedef uint32 Unsigned; 518 typedef uint32 Unsigned;
569 #endif 519 #endif
570 }; 520 };
571 template<> 521 template<>
572 struct ToUnsigned<short> { 522 struct ToUnsigned<short> {
573 typedef unsigned short Unsigned; 523 typedef unsigned short Unsigned;
574 }; 524 };
575 525
576 #endif // BASE_STRINGS_STRING_UTIL_H_ 526 #endif // BASE_STRINGS_STRING_UTIL_H_
OLDNEW
« no previous file with comments | « no previous file | base/strings/string_util.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698