Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(586)

Side by Side Diff: base/strings/string_util.h

Issue 1641513004: Update //base to chromium 9659b08ea5a34f889dc4166217f438095ddc10d2 (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « base/strings/string_split_unittest.cc ('k') | base/strings/string_util.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 // 4 //
5 // This file defines utility functions for working with strings. 5 // This file defines utility functions for working with strings.
6 6
7 #ifndef BASE_STRINGS_STRING_UTIL_H_ 7 #ifndef BASE_STRINGS_STRING_UTIL_H_
8 #define BASE_STRINGS_STRING_UTIL_H_ 8 #define BASE_STRINGS_STRING_UTIL_H_
9 9
10 #include <ctype.h> 10 #include <ctype.h>
11 #include <stdarg.h> // va_list 11 #include <stdarg.h> // va_list
12 12
13 #include <string> 13 #include <string>
14 #include <vector> 14 #include <vector>
15 15
16 #include "base/base_export.h" 16 #include "base/base_export.h"
17 #include "base/basictypes.h" 17 #include "base/basictypes.h"
18 #include "base/compiler_specific.h" 18 #include "base/compiler_specific.h"
19 #include "base/strings/string16.h" 19 #include "base/strings/string16.h"
20 #include "base/strings/string_piece.h" // For implicit conversions. 20 #include "base/strings/string_piece.h" // For implicit conversions.
21 21
22 namespace base { 22 namespace base {
23 23
24 // C standard-library functions like "strncasecmp" and "snprintf" that aren't 24 // C standard-library functions that aren't cross-platform are provided as
25 // cross-platform are provided as "base::strncasecmp", and their prototypes 25 // "base::...", and their prototypes are listed below. These functions are
26 // are listed below. These functions are then implemented as inline calls 26 // then implemented as inline calls to the platform-specific equivalents in the
27 // to the platform-specific equivalents in the platform-specific headers. 27 // platform-specific headers.
28
29 // Compares the two strings s1 and s2 without regard to case using
30 // the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if
31 // s2 > s1 according to a lexicographic comparison.
32 int strcasecmp(const char* s1, const char* s2);
33
34 // Compares up to count characters of s1 and s2 without regard to case using
35 // the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if
36 // s2 > s1 according to a lexicographic comparison.
37 int strncasecmp(const char* s1, const char* s2, size_t count);
38
39 // Same as strncmp but for char16 strings.
40 int strncmp16(const char16* s1, const char16* s2, size_t count);
41 28
42 // Wrapper for vsnprintf that always null-terminates and always returns the 29 // Wrapper for vsnprintf that always null-terminates and always returns the
43 // number of characters that would be in an untruncated formatted 30 // number of characters that would be in an untruncated formatted
44 // string, even when truncation occurs. 31 // string, even when truncation occurs.
45 int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments) 32 int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments)
46 PRINTF_FORMAT(3, 0); 33 PRINTF_FORMAT(3, 0);
47 34
48 // Some of these implementations need to be inlined. 35 // Some of these implementations need to be inlined.
49 36
50 // We separate the declaration from the implementation of this inline 37 // We separate the declaration from the implementation of this inline
51 // function just so the PRINTF_FORMAT works. 38 // function just so the PRINTF_FORMAT works.
52 inline int snprintf(char* buffer, size_t size, const char* format, ...) 39 inline int snprintf(char* buffer, size_t size, const char* format, ...)
53 PRINTF_FORMAT(3, 4); 40 PRINTF_FORMAT(3, 4);
54 inline int snprintf(char* buffer, size_t size, const char* format, ...) { 41 inline int snprintf(char* buffer, size_t size, const char* format, ...) {
55 va_list arguments; 42 va_list arguments;
56 va_start(arguments, format); 43 va_start(arguments, format);
57 int result = vsnprintf(buffer, size, format, arguments); 44 int result = vsnprintf(buffer, size, format, arguments);
58 va_end(arguments); 45 va_end(arguments);
59 return result; 46 return result;
60 } 47 }
61 48
49 // TODO(mark) http://crbug.com/472900 crashpad shouldn't use base while
50 // being DEPSed in. This backwards-compat hack is provided until crashpad is
51 // updated.
52 #if defined(OS_WIN)
53 inline int strcasecmp(const char* s1, const char* s2) {
54 return _stricmp(s1, s2);
55 }
56 #else // Posix
57 inline int strcasecmp(const char* string1, const char* string2) {
58 return ::strcasecmp(string1, string2);
59 }
60 #endif
61
62 // BSD-style safe and consistent string copy functions. 62 // BSD-style safe and consistent string copy functions.
63 // Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|. 63 // Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|.
64 // Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as 64 // Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as
65 // long as |dst_size| is not 0. Returns the length of |src| in characters. 65 // long as |dst_size| is not 0. Returns the length of |src| in characters.
66 // If the return value is >= dst_size, then the output was truncated. 66 // If the return value is >= dst_size, then the output was truncated.
67 // NOTE: All sizes are in number of characters, NOT in bytes. 67 // NOTE: All sizes are in number of characters, NOT in bytes.
68 BASE_EXPORT size_t strlcpy(char* dst, const char* src, size_t dst_size); 68 BASE_EXPORT size_t strlcpy(char* dst, const char* src, size_t dst_size);
69 BASE_EXPORT size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size); 69 BASE_EXPORT size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size);
70 70
71 // Scan a wprintf format string to determine whether it's portable across a 71 // Scan a wprintf format string to determine whether it's portable across a
(...skipping 24 matching lines...) Expand all
96 template <class Char> inline Char ToLowerASCII(Char c) { 96 template <class Char> inline Char ToLowerASCII(Char c) {
97 return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; 97 return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
98 } 98 }
99 99
100 // ASCII-specific toupper. The standard library's toupper is locale sensitive, 100 // ASCII-specific toupper. The standard library's toupper is locale sensitive,
101 // so we don't want to use it here. 101 // so we don't want to use it here.
102 template <class Char> inline Char ToUpperASCII(Char c) { 102 template <class Char> inline Char ToUpperASCII(Char c) {
103 return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c; 103 return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c;
104 } 104 }
105 105
106 // Function objects to aid in comparing/searching strings. 106 // Functor for case-insensitive ASCII comparisons for STL algorithms like
107 107 // std::search.
108 template<typename Char> struct CaseInsensitiveCompare { 108 //
109 public: 109 // Note that a full Unicode version of this functor is not possible to write
110 bool operator()(Char x, Char y) const { 110 // because case mappings might change the number of characters, depend on
111 // TODO(darin): Do we really want to do locale sensitive comparisons here? 111 // context (combining accents), and require handling UTF-16. If you need
112 // See http://crbug.com/24917 112 // proper Unicode support, use base::i18n::ToLower/FoldCase and then just
113 return tolower(x) == tolower(y); 113 // use a normal operator== on the result.
114 }
115 };
116
117 template<typename Char> struct CaseInsensitiveCompareASCII { 114 template<typename Char> struct CaseInsensitiveCompareASCII {
118 public: 115 public:
119 bool operator()(Char x, Char y) const { 116 bool operator()(Char x, Char y) const {
120 return ToLowerASCII(x) == ToLowerASCII(y); 117 return ToLowerASCII(x) == ToLowerASCII(y);
121 } 118 }
122 }; 119 };
123 120
121 // Like strcasecmp for case-insensitive ASCII characters only. Returns:
122 // -1 (a < b)
123 // 0 (a == b)
124 // 1 (a > b)
125 // (unlike strcasecmp which can return values greater or less than 1/-1). For
126 // full Unicode support, use base::i18n::ToLower or base::i18h::FoldCase
127 // and then just call the normal string operators on the result.
128 BASE_EXPORT int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b);
129 BASE_EXPORT int CompareCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b);
130
131 // Equality for ASCII case-insensitive comparisons. For full Unicode support,
132 // use base::i18n::ToLower or base::i18h::FoldCase and then compare with either
133 // == or !=.
134 BASE_EXPORT bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b);
135 BASE_EXPORT bool EqualsCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b);
136
124 // These threadsafe functions return references to globally unique empty 137 // These threadsafe functions return references to globally unique empty
125 // strings. 138 // strings.
126 // 139 //
127 // It is likely faster to construct a new empty string object (just a few 140 // It is likely faster to construct a new empty string object (just a few
128 // instructions to set the length to 0) than to get the empty string singleton 141 // instructions to set the length to 0) than to get the empty string singleton
129 // returned by these functions (which requires threadsafe singleton access). 142 // returned by these functions (which requires threadsafe singleton access).
130 // 143 //
131 // Therefore, DO NOT USE THESE AS A GENERAL-PURPOSE SUBSTITUTE FOR DEFAULT 144 // Therefore, DO NOT USE THESE AS A GENERAL-PURPOSE SUBSTITUTE FOR DEFAULT
132 // CONSTRUCTORS. There is only one case where you should use these: functions 145 // CONSTRUCTORS. There is only one case where you should use these: functions
133 // which need to return a string by reference (e.g. as a class member 146 // which need to return a string by reference (e.g. as a class member
134 // accessor), and don't have an empty string to use (e.g. in an error case). 147 // accessor), and don't have an empty string to use (e.g. in an error case).
135 // These should not be used as initializers, function arguments, or return 148 // These should not be used as initializers, function arguments, or return
136 // values for functions which return by value or outparam. 149 // values for functions which return by value or outparam.
137 BASE_EXPORT const std::string& EmptyString(); 150 BASE_EXPORT const std::string& EmptyString();
138 BASE_EXPORT const string16& EmptyString16(); 151 BASE_EXPORT const string16& EmptyString16();
139 152
140 // Contains the set of characters representing whitespace in the corresponding 153 // Contains the set of characters representing whitespace in the corresponding
141 // encoding. Null-terminated. 154 // encoding. Null-terminated. The ASCII versions are the whitespaces as defined
142 BASE_EXPORT extern const wchar_t kWhitespaceWide[]; 155 // by HTML5, and don't include control characters.
143 BASE_EXPORT extern const char16 kWhitespaceUTF16[]; 156 BASE_EXPORT extern const wchar_t kWhitespaceWide[]; // Includes Unicode.
157 BASE_EXPORT extern const char16 kWhitespaceUTF16[]; // Includes Unicode.
144 BASE_EXPORT extern const char kWhitespaceASCII[]; 158 BASE_EXPORT extern const char kWhitespaceASCII[];
159 BASE_EXPORT extern const char16 kWhitespaceASCIIAs16[]; // No unicode.
145 160
146 // Null-terminated string representing the UTF-8 byte order mark. 161 // Null-terminated string representing the UTF-8 byte order mark.
147 BASE_EXPORT extern const char kUtf8ByteOrderMark[]; 162 BASE_EXPORT extern const char kUtf8ByteOrderMark[];
148 163
149 // Removes characters in |remove_chars| from anywhere in |input|. Returns true 164 // Removes characters in |remove_chars| from anywhere in |input|. Returns true
150 // if any characters were removed. |remove_chars| must be null-terminated. 165 // if any characters were removed. |remove_chars| must be null-terminated.
151 // NOTE: Safe to use the same variable for both |input| and |output|. 166 // NOTE: Safe to use the same variable for both |input| and |output|.
152 BASE_EXPORT bool RemoveChars(const string16& input, 167 BASE_EXPORT bool RemoveChars(const string16& input,
153 const base::StringPiece16& remove_chars, 168 const StringPiece16& remove_chars,
154 string16* output); 169 string16* output);
155 BASE_EXPORT bool RemoveChars(const std::string& input, 170 BASE_EXPORT bool RemoveChars(const std::string& input,
156 const base::StringPiece& remove_chars, 171 const StringPiece& remove_chars,
157 std::string* output); 172 std::string* output);
158 173
159 // Replaces characters in |replace_chars| from anywhere in |input| with 174 // Replaces characters in |replace_chars| from anywhere in |input| with
160 // |replace_with|. Each character in |replace_chars| will be replaced with 175 // |replace_with|. Each character in |replace_chars| will be replaced with
161 // the |replace_with| string. Returns true if any characters were replaced. 176 // the |replace_with| string. Returns true if any characters were replaced.
162 // |replace_chars| must be null-terminated. 177 // |replace_chars| must be null-terminated.
163 // NOTE: Safe to use the same variable for both |input| and |output|. 178 // NOTE: Safe to use the same variable for both |input| and |output|.
164 BASE_EXPORT bool ReplaceChars(const string16& input, 179 BASE_EXPORT bool ReplaceChars(const string16& input,
165 const base::StringPiece16& replace_chars, 180 const StringPiece16& replace_chars,
166 const string16& replace_with, 181 const string16& replace_with,
167 string16* output); 182 string16* output);
168 BASE_EXPORT bool ReplaceChars(const std::string& input, 183 BASE_EXPORT bool ReplaceChars(const std::string& input,
169 const base::StringPiece& replace_chars, 184 const StringPiece& replace_chars,
170 const std::string& replace_with, 185 const std::string& replace_with,
171 std::string* output); 186 std::string* output);
172 187
188 enum TrimPositions {
189 TRIM_NONE = 0,
190 TRIM_LEADING = 1 << 0,
191 TRIM_TRAILING = 1 << 1,
192 TRIM_ALL = TRIM_LEADING | TRIM_TRAILING,
193 };
194
173 // Removes characters in |trim_chars| from the beginning and end of |input|. 195 // Removes characters in |trim_chars| from the beginning and end of |input|.
174 // |trim_chars| must be null-terminated. 196 // The 8-bit version only works on 8-bit characters, not UTF-8.
175 // NOTE: Safe to use the same variable for both |input| and |output|. 197 //
198 // It is safe to use the same variable for both |input| and |output| (this is
199 // the normal usage to trim in-place).
176 BASE_EXPORT bool TrimString(const string16& input, 200 BASE_EXPORT bool TrimString(const string16& input,
177 const base::StringPiece16& trim_chars, 201 StringPiece16 trim_chars,
178 string16* output); 202 string16* output);
179 BASE_EXPORT bool TrimString(const std::string& input, 203 BASE_EXPORT bool TrimString(const std::string& input,
180 const base::StringPiece& trim_chars, 204 StringPiece trim_chars,
181 std::string* output); 205 std::string* output);
182 206
207 // StringPiece versions of the above. The returned pieces refer to the original
208 // buffer.
209 BASE_EXPORT StringPiece16 TrimString(StringPiece16 input,
210 const StringPiece16& trim_chars,
211 TrimPositions positions);
212 BASE_EXPORT StringPiece TrimString(StringPiece input,
213 const StringPiece& trim_chars,
214 TrimPositions positions);
215
183 // Truncates a string to the nearest UTF-8 character that will leave 216 // Truncates a string to the nearest UTF-8 character that will leave
184 // the string less than or equal to the specified byte size. 217 // the string less than or equal to the specified byte size.
185 BASE_EXPORT void TruncateUTF8ToByteSize(const std::string& input, 218 BASE_EXPORT void TruncateUTF8ToByteSize(const std::string& input,
186 const size_t byte_size, 219 const size_t byte_size,
187 std::string* output); 220 std::string* output);
188 221
189 // Trims any whitespace from either end of the input string. Returns where 222 // Trims any whitespace from either end of the input string.
190 // whitespace was found. 223 //
191 // The non-wide version has two functions: 224 // The StringPiece versions return a substring referencing the input buffer.
192 // * TrimWhitespaceASCII() 225 // The ASCII versions look only for ASCII whitespace.
193 // This function is for ASCII strings and only looks for ASCII whitespace; 226 //
194 // Please choose the best one according to your usage. 227 // The std::string versions return where whitespace was found.
195 // NOTE: Safe to use the same variable for both input and output. 228 // NOTE: Safe to use the same variable for both input and output.
196 enum TrimPositions {
197 TRIM_NONE = 0,
198 TRIM_LEADING = 1 << 0,
199 TRIM_TRAILING = 1 << 1,
200 TRIM_ALL = TRIM_LEADING | TRIM_TRAILING,
201 };
202 BASE_EXPORT TrimPositions TrimWhitespace(const string16& input, 229 BASE_EXPORT TrimPositions TrimWhitespace(const string16& input,
203 TrimPositions positions, 230 TrimPositions positions,
204 base::string16* output); 231 string16* output);
232 BASE_EXPORT StringPiece16 TrimWhitespace(StringPiece16 input,
233 TrimPositions positions);
205 BASE_EXPORT TrimPositions TrimWhitespaceASCII(const std::string& input, 234 BASE_EXPORT TrimPositions TrimWhitespaceASCII(const std::string& input,
206 TrimPositions positions, 235 TrimPositions positions,
207 std::string* output); 236 std::string* output);
237 BASE_EXPORT StringPiece TrimWhitespaceASCII(StringPiece input,
238 TrimPositions positions);
208 239
209 // Deprecated. This function is only for backward compatibility and calls 240 // Deprecated. This function is only for backward compatibility and calls
210 // TrimWhitespaceASCII(). 241 // TrimWhitespaceASCII().
211 BASE_EXPORT TrimPositions TrimWhitespace(const std::string& input, 242 BASE_EXPORT TrimPositions TrimWhitespace(const std::string& input,
212 TrimPositions positions, 243 TrimPositions positions,
213 std::string* output); 244 std::string* output);
214 245
215 // Searches for CR or LF characters. Removes all contiguous whitespace 246 // Searches for CR or LF characters. Removes all contiguous whitespace
216 // strings that contain them. This is useful when trying to deal with text 247 // strings that contain them. This is useful when trying to deal with text
217 // copied from terminals. 248 // copied from terminals.
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
308 const char* b_begin, 339 const char* b_begin,
309 const char* b_end); 340 const char* b_end);
310 BASE_EXPORT bool LowerCaseEqualsASCII(const char16* a_begin, 341 BASE_EXPORT bool LowerCaseEqualsASCII(const char16* a_begin,
311 const char16* a_end, 342 const char16* a_end,
312 const char* b); 343 const char* b);
313 344
314 // Performs a case-sensitive string compare. The behavior is undefined if both 345 // Performs a case-sensitive string compare. The behavior is undefined if both
315 // strings are not ASCII. 346 // strings are not ASCII.
316 BASE_EXPORT bool EqualsASCII(const string16& a, const StringPiece& b); 347 BASE_EXPORT bool EqualsASCII(const string16& a, const StringPiece& b);
317 348
318 } // namespace base 349 // Indicates case sensitivity of comparisons. Only ASCII case insensitivity
350 // is supported. Full Unicode case-insensitive conversions would need to go in
351 // base/i18n so it can use ICU.
352 //
353 // If you need to do Unicode-aware case-insensitive StartsWith/EndsWith, it's
354 // best to call base::i18n::ToLower() or base::i18n::FoldCase() (see
355 // base/i18n/case_conversion.h for usage advice) on the arguments, and then use
356 // the results to a case-sensitive comparison.
357 enum class CompareCase {
358 SENSITIVE,
359 INSENSITIVE_ASCII,
360 };
319 361
320 #if defined(OS_WIN) 362 BASE_EXPORT bool StartsWith(StringPiece str,
321 #include "base/strings/string_util_win.h" 363 StringPiece search_for,
322 #elif defined(OS_POSIX) 364 CompareCase case_sensitivity);
323 #include "base/strings/string_util_posix.h" 365 BASE_EXPORT bool StartsWith(StringPiece16 str,
324 #else 366 StringPiece16 search_for,
325 #error Define string operations appropriately for your platform 367 CompareCase case_sensitivity);
326 #endif 368 BASE_EXPORT bool EndsWith(StringPiece str,
369 StringPiece search_for,
370 CompareCase case_sensitivity);
371 BASE_EXPORT bool EndsWith(StringPiece16 str,
372 StringPiece16 search_for,
373 CompareCase case_sensitivity);
327 374
328 // Returns true if str starts with search, or false otherwise. 375 // DEPRECATED. Returns true if str starts/ends with search, or false otherwise.
329 BASE_EXPORT bool StartsWithASCII(const std::string& str, 376 // TODO(brettw) remove in favor of the "enum" versions above.
330 const std::string& search, 377 inline bool StartsWithASCII(const std::string& str,
331 bool case_sensitive); 378 const std::string& search,
332 BASE_EXPORT bool StartsWith(const base::string16& str, 379 bool case_sensitive) {
333 const base::string16& search, 380 return StartsWith(
334 bool case_sensitive); 381 StringPiece(str), StringPiece(search),
335 382 case_sensitive ? CompareCase::SENSITIVE : CompareCase::INSENSITIVE_ASCII);
336 // Returns true if str ends with search, or false otherwise. 383 }
337 BASE_EXPORT bool EndsWith(const std::string& str,
338 const std::string& search,
339 bool case_sensitive);
340 BASE_EXPORT bool EndsWith(const base::string16& str,
341 const base::string16& search,
342 bool case_sensitive);
343
344 384
345 // Determines the type of ASCII character, independent of locale (the C 385 // Determines the type of ASCII character, independent of locale (the C
346 // library versions will change based on locale). 386 // library versions will change based on locale).
347 template <typename Char> 387 template <typename Char>
348 inline bool IsAsciiWhitespace(Char c) { 388 inline bool IsAsciiWhitespace(Char c) {
349 return c == ' ' || c == '\r' || c == '\n' || c == '\t'; 389 return c == ' ' || c == '\r' || c == '\n' || c == '\t';
350 } 390 }
351 template <typename Char> 391 template <typename Char>
352 inline bool IsAsciiAlpha(Char c) { 392 inline bool IsAsciiAlpha(Char c) {
353 return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')); 393 return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z'));
354 } 394 }
355 template <typename Char> 395 template <typename Char>
356 inline bool IsAsciiDigit(Char c) { 396 inline bool IsAsciiDigit(Char c) {
357 return c >= '0' && c <= '9'; 397 return c >= '0' && c <= '9';
358 } 398 }
359 399
360 template <typename Char> 400 template <typename Char>
361 inline bool IsHexDigit(Char c) { 401 inline bool IsHexDigit(Char c) {
362 return (c >= '0' && c <= '9') || 402 return (c >= '0' && c <= '9') ||
363 (c >= 'A' && c <= 'F') || 403 (c >= 'A' && c <= 'F') ||
364 (c >= 'a' && c <= 'f'); 404 (c >= 'a' && c <= 'f');
365 } 405 }
366 406
367 template <typename Char> 407 // Returns the integer corresponding to the given hex character. For example:
368 inline char HexDigitToInt(Char c) { 408 // '4' -> 4
369 DCHECK(IsHexDigit(c)); 409 // 'a' -> 10
370 if (c >= '0' && c <= '9') 410 // 'B' -> 11
371 return static_cast<char>(c - '0'); 411 // Assumes the input is a valid hex character. DCHECKs in debug builds if not.
372 if (c >= 'A' && c <= 'F') 412 BASE_EXPORT char HexDigitToInt(wchar_t c);
373 return static_cast<char>(c - 'A' + 10);
374 if (c >= 'a' && c <= 'f')
375 return static_cast<char>(c - 'a' + 10);
376 return 0;
377 }
378 413
379 // Returns true if it's a whitespace character. 414 // Returns true if it's a Unicode whitespace character.
380 inline bool IsWhitespace(wchar_t c) { 415 inline bool IsUnicodeWhitespace(wchar_t c) {
381 return wcschr(base::kWhitespaceWide, c) != NULL; 416 return wcschr(base::kWhitespaceWide, c) != NULL;
382 } 417 }
383 418
384 // Return a byte string in human-readable format with a unit suffix. Not 419 // Return a byte string in human-readable format with a unit suffix. Not
385 // appropriate for use in any UI; use of FormatBytes and friends in ui/base is 420 // appropriate for use in any UI; use of FormatBytes and friends in ui/base is
386 // highly recommended instead. TODO(avi): Figure out how to get callers to use 421 // highly recommended instead. TODO(avi): Figure out how to get callers to use
387 // FormatBytes instead; remove this. 422 // FormatBytes instead; remove this.
388 BASE_EXPORT base::string16 FormatBytesUnlocalized(int64 bytes); 423 BASE_EXPORT string16 FormatBytesUnlocalized(int64 bytes);
389 424
390 // Starting at |start_offset| (usually 0), replace the first instance of 425 // Starting at |start_offset| (usually 0), replace the first instance of
391 // |find_this| with |replace_with|. 426 // |find_this| with |replace_with|.
392 BASE_EXPORT void ReplaceFirstSubstringAfterOffset( 427 BASE_EXPORT void ReplaceFirstSubstringAfterOffset(base::string16* str,
393 base::string16* str, 428 size_t start_offset,
394 size_t start_offset, 429 StringPiece16 find_this,
395 const base::string16& find_this, 430 StringPiece16 replace_with);
396 const base::string16& replace_with); 431 BASE_EXPORT void ReplaceFirstSubstringAfterOffset(std::string* str,
397 BASE_EXPORT void ReplaceFirstSubstringAfterOffset( 432 size_t start_offset,
398 std::string* str, 433 StringPiece find_this,
399 size_t start_offset, 434 StringPiece replace_with);
400 const std::string& find_this,
401 const std::string& replace_with);
402 435
403 // Starting at |start_offset| (usually 0), look through |str| and replace all 436 // Starting at |start_offset| (usually 0), look through |str| and replace all
404 // instances of |find_this| with |replace_with|. 437 // instances of |find_this| with |replace_with|.
405 // 438 //
406 // This does entire substrings; use std::replace in <algorithm> for single 439 // This does entire substrings; use std::replace in <algorithm> for single
407 // characters, for example: 440 // characters, for example:
408 // std::replace(str.begin(), str.end(), 'a', 'b'); 441 // std::replace(str.begin(), str.end(), 'a', 'b');
409 BASE_EXPORT void ReplaceSubstringsAfterOffset( 442 BASE_EXPORT void ReplaceSubstringsAfterOffset(string16* str,
410 base::string16* str, 443 size_t start_offset,
411 size_t start_offset, 444 StringPiece16 find_this,
412 const base::string16& find_this, 445 StringPiece16 replace_with);
413 const base::string16& replace_with);
414 BASE_EXPORT void ReplaceSubstringsAfterOffset(std::string* str, 446 BASE_EXPORT void ReplaceSubstringsAfterOffset(std::string* str,
415 size_t start_offset, 447 size_t start_offset,
416 const std::string& find_this, 448 StringPiece find_this,
417 const std::string& replace_with); 449 StringPiece replace_with);
418 450
419 // Reserves enough memory in |str| to accommodate |length_with_null| characters, 451 // Reserves enough memory in |str| to accommodate |length_with_null| characters,
420 // sets the size of |str| to |length_with_null - 1| characters, and returns a 452 // sets the size of |str| to |length_with_null - 1| characters, and returns a
421 // pointer to the underlying contiguous array of characters. This is typically 453 // pointer to the underlying contiguous array of characters. This is typically
422 // used when calling a function that writes results into a character array, but 454 // used when calling a function that writes results into a character array, but
423 // the caller wants the data to be managed by a string-like object. It is 455 // the caller wants the data to be managed by a string-like object. It is
424 // convenient in that is can be used inline in the call, and fast in that it 456 // convenient in that is can be used inline in the call, and fast in that it
425 // avoids copying the results of the call from a char* into a string. 457 // avoids copying the results of the call from a char* into a string.
426 // 458 //
427 // |length_with_null| must be at least 2, since otherwise the underlying string 459 // |length_with_null| must be at least 2, since otherwise the underlying string
428 // would have size 0, and trying to access &((*str)[0]) in that case can result 460 // would have size 0, and trying to access &((*str)[0]) in that case can result
429 // in a number of problems. 461 // in a number of problems.
430 // 462 //
431 // Internally, this takes linear time because the resize() call 0-fills the 463 // Internally, this takes linear time because the resize() call 0-fills the
432 // underlying array for potentially all 464 // underlying array for potentially all
433 // (|length_with_null - 1| * sizeof(string_type::value_type)) bytes. Ideally we 465 // (|length_with_null - 1| * sizeof(string_type::value_type)) bytes. Ideally we
434 // could avoid this aspect of the resize() call, as we expect the caller to 466 // could avoid this aspect of the resize() call, as we expect the caller to
435 // immediately write over this memory, but there is no other way to set the size 467 // immediately write over this memory, but there is no other way to set the size
436 // of the string, and not doing that will mean people who access |str| rather 468 // of the string, and not doing that will mean people who access |str| rather
437 // than str.c_str() will get back a string of whatever size |str| had on entry 469 // than str.c_str() will get back a string of whatever size |str| had on entry
438 // to this function (probably 0). 470 // to this function (probably 0).
439 template <class string_type> 471 BASE_EXPORT char* WriteInto(std::string* str, size_t length_with_null);
440 inline typename string_type::value_type* WriteInto(string_type* str, 472 BASE_EXPORT char16* WriteInto(string16* str, size_t length_with_null);
441 size_t length_with_null) { 473 #ifndef OS_WIN
442 DCHECK_GT(length_with_null, 1u); 474 BASE_EXPORT wchar_t* WriteInto(std::wstring* str, size_t length_with_null);
443 str->reserve(length_with_null); 475 #endif
444 str->resize(length_with_null - 1);
445 return &((*str)[0]);
446 }
447
448 //-----------------------------------------------------------------------------
449
450 // Splits a string into its fields delimited by any of the characters in
451 // |delimiters|. Each field is added to the |tokens| vector. Returns the
452 // number of tokens found.
453 BASE_EXPORT size_t Tokenize(const base::string16& str,
454 const base::string16& delimiters,
455 std::vector<base::string16>* tokens);
456 BASE_EXPORT size_t Tokenize(const std::string& str,
457 const std::string& delimiters,
458 std::vector<std::string>* tokens);
459 BASE_EXPORT size_t Tokenize(const base::StringPiece& str,
460 const base::StringPiece& delimiters,
461 std::vector<base::StringPiece>* tokens);
462 476
463 // Does the opposite of SplitString(). 477 // Does the opposite of SplitString().
464 BASE_EXPORT base::string16 JoinString(const std::vector<base::string16>& parts, 478 BASE_EXPORT std::string JoinString(const std::vector<std::string>& parts,
465 base::char16 s); 479 StringPiece separator);
466 BASE_EXPORT std::string JoinString( 480 BASE_EXPORT string16 JoinString(const std::vector<string16>& parts,
467 const std::vector<std::string>& parts, char s); 481 StringPiece16 separator);
468
469 // Join |parts| using |separator|.
470 BASE_EXPORT std::string JoinString(
471 const std::vector<std::string>& parts,
472 const std::string& separator);
473 BASE_EXPORT base::string16 JoinString(
474 const std::vector<base::string16>& parts,
475 const base::string16& separator);
476 482
477 // Replace $1-$2-$3..$9 in the format string with |a|-|b|-|c|..|i| respectively. 483 // Replace $1-$2-$3..$9 in the format string with |a|-|b|-|c|..|i| respectively.
478 // Additionally, any number of consecutive '$' characters is replaced by that 484 // Additionally, any number of consecutive '$' characters is replaced by that
479 // number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be 485 // number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be
480 // NULL. This only allows you to use up to nine replacements. 486 // NULL. This only allows you to use up to nine replacements.
481 BASE_EXPORT base::string16 ReplaceStringPlaceholders( 487 BASE_EXPORT string16
482 const base::string16& format_string, 488 ReplaceStringPlaceholders(const string16& format_string,
483 const std::vector<base::string16>& subst, 489 const std::vector<string16>& subst,
484 std::vector<size_t>* offsets); 490 std::vector<size_t>* offsets);
485 491
486 BASE_EXPORT std::string ReplaceStringPlaceholders( 492 BASE_EXPORT std::string ReplaceStringPlaceholders(
487 const base::StringPiece& format_string, 493 const StringPiece& format_string,
488 const std::vector<std::string>& subst, 494 const std::vector<std::string>& subst,
489 std::vector<size_t>* offsets); 495 std::vector<size_t>* offsets);
490 496
491 // Single-string shortcut for ReplaceStringHolders. |offset| may be NULL. 497 // Single-string shortcut for ReplaceStringHolders. |offset| may be NULL.
492 BASE_EXPORT base::string16 ReplaceStringPlaceholders( 498 BASE_EXPORT string16 ReplaceStringPlaceholders(const string16& format_string,
493 const base::string16& format_string, 499 const string16& a,
494 const base::string16& a, 500 size_t* offset);
495 size_t* offset);
496 501
497 // Returns true if the string passed in matches the pattern. The pattern 502 } // namespace base
498 // string can contain wildcards like * and ? 503
499 // The backslash character (\) is an escape character for * and ? 504 #if defined(OS_WIN)
500 // We limit the patterns to having a max of 16 * or ? characters. 505 #include "base/strings/string_util_win.h"
501 // ? matches 0 or 1 character, while * matches 0 or more characters. 506 #elif defined(OS_POSIX)
502 BASE_EXPORT bool MatchPattern(const base::StringPiece& string, 507 #include "base/strings/string_util_posix.h"
503 const base::StringPiece& pattern); 508 #else
504 BASE_EXPORT bool MatchPattern(const base::string16& string, 509 #error Define string operations appropriately for your platform
505 const base::string16& pattern); 510 #endif
506 511
507 #endif // BASE_STRINGS_STRING_UTIL_H_ 512 #endif // BASE_STRINGS_STRING_UTIL_H_
OLDNEW
« no previous file with comments | « base/strings/string_split_unittest.cc ('k') | base/strings/string_util.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698