OLD | NEW |
---|---|
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 // | |
5 // This file defines utility functions for working with strings. | |
6 | 4 |
7 #ifndef BASE_STRING_UTIL_H_ | 5 #ifndef BASE_STRING_UTIL_STATIC_H_ |
brettw
2011/04/20 16:03:41
Why is this file necessary? It seems confusing to
rvargas (doing something else)
2011/04/20 21:23:58
It seemed the same use case as the base switches.
| |
8 #define BASE_STRING_UTIL_H_ | 6 #define BASE_STRING_UTIL_STATIC_H_ |
9 #pragma once | 7 #pragma once |
10 | 8 |
11 #include <stdarg.h> // va_list | 9 #include "base/basictypes.h" |
12 | 10 |
13 #include <string> | 11 // Avoid including base/string16.h from this file. |
14 #include <vector> | 12 #if defined(WCHAR_T_IS_UTF16) |
15 | 13 typedef wchar_t char16; |
16 #include "base/base_api.h" | 14 #elif defined(WCHAR_T_IS_UTF32) |
17 #include "base/basictypes.h" | 15 typedef uint16 char16; |
18 #include "base/compiler_specific.h" | |
19 #include "base/string16.h" | |
20 #include "base/string_piece.h" // For implicit conversions. | |
21 | |
22 // TODO(brettw) remove this dependency. Previously StringPrintf lived in this | |
23 // file. We need to convert the callers over to using stringprintf.h instead | |
24 // and then remove this. | |
25 #include "base/stringprintf.h" | |
26 | |
27 // Safe standard library wrappers for all platforms. | |
28 | |
29 namespace base { | |
30 | |
31 // C standard-library functions like "strncasecmp" and "snprintf" that aren't | |
32 // cross-platform are provided as "base::strncasecmp", and their prototypes | |
33 // are listed below. These functions are then implemented as inline calls | |
34 // to the platform-specific equivalents in the platform-specific headers. | |
35 | |
36 // Compares the two strings s1 and s2 without regard to case using | |
37 // the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if | |
38 // s2 > s1 according to a lexicographic comparison. | |
39 BASE_API int strcasecmp(const char* s1, const char* s2); | |
40 | |
41 // Compares up to count characters of s1 and s2 without regard to case using | |
42 // the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if | |
43 // s2 > s1 according to a lexicographic comparison. | |
44 BASE_API int strncasecmp(const char* s1, const char* s2, size_t count); | |
45 | |
46 // Same as strncmp but for char16 strings. | |
47 BASE_API int strncmp16(const char16* s1, const char16* s2, size_t count); | |
48 | |
49 // Wrapper for vsnprintf that always null-terminates and always returns the | |
50 // number of characters that would be in an untruncated formatted | |
51 // string, even when truncation occurs. | |
52 BASE_API int vsnprintf(char* buffer, size_t size, const char* format, | |
53 va_list arguments) | |
54 PRINTF_FORMAT(3, 0); | |
55 | |
56 // vswprintf always null-terminates, but when truncation occurs, it will either | |
57 // return -1 or the number of characters that would be in an untruncated | |
58 // formatted string. The actual return value depends on the underlying | |
59 // C library's vswprintf implementation. | |
60 BASE_API int vswprintf(wchar_t* buffer, size_t size, | |
61 const wchar_t* format, va_list arguments) | |
62 WPRINTF_FORMAT(3, 0); | |
63 | |
64 // Some of these implementations need to be inlined. | |
65 | |
66 // We separate the declaration from the implementation of this inline | |
67 // function just so the PRINTF_FORMAT works. | |
68 inline int snprintf(char* buffer, size_t size, const char* format, ...) | |
69 PRINTF_FORMAT(3, 4); | |
70 inline int snprintf(char* buffer, size_t size, const char* format, ...) { | |
71 va_list arguments; | |
72 va_start(arguments, format); | |
73 int result = vsnprintf(buffer, size, format, arguments); | |
74 va_end(arguments); | |
75 return result; | |
76 } | |
77 | |
78 // We separate the declaration from the implementation of this inline | |
79 // function just so the WPRINTF_FORMAT works. | |
80 inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...) | |
81 WPRINTF_FORMAT(3, 4); | |
82 inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...) { | |
83 va_list arguments; | |
84 va_start(arguments, format); | |
85 int result = vswprintf(buffer, size, format, arguments); | |
86 va_end(arguments); | |
87 return result; | |
88 } | |
89 | |
90 // BSD-style safe and consistent string copy functions. | |
91 // Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|. | |
92 // Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as | |
93 // long as |dst_size| is not 0. Returns the length of |src| in characters. | |
94 // If the return value is >= dst_size, then the output was truncated. | |
95 // NOTE: All sizes are in number of characters, NOT in bytes. | |
96 BASE_API size_t strlcpy(char* dst, const char* src, size_t dst_size); | |
97 BASE_API size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size); | |
98 | |
99 // Scan a wprintf format string to determine whether it's portable across a | |
100 // variety of systems. This function only checks that the conversion | |
101 // specifiers used by the format string are supported and have the same meaning | |
102 // on a variety of systems. It doesn't check for other errors that might occur | |
103 // within a format string. | |
104 // | |
105 // Nonportable conversion specifiers for wprintf are: | |
106 // - 's' and 'c' without an 'l' length modifier. %s and %c operate on char | |
107 // data on all systems except Windows, which treat them as wchar_t data. | |
108 // Use %ls and %lc for wchar_t data instead. | |
109 // - 'S' and 'C', which operate on wchar_t data on all systems except Windows, | |
110 // which treat them as char data. Use %ls and %lc for wchar_t data | |
111 // instead. | |
112 // - 'F', which is not identified by Windows wprintf documentation. | |
113 // - 'D', 'O', and 'U', which are deprecated and not available on all systems. | |
114 // Use %ld, %lo, and %lu instead. | |
115 // | |
116 // Note that there is no portable conversion specifier for char data when | |
117 // working with wprintf. | |
118 // | |
119 // This function is intended to be called from base::vswprintf. | |
120 BASE_API bool IsWprintfFormatPortable(const wchar_t* format); | |
121 | |
122 // ASCII-specific tolower. The standard library's tolower is locale sensitive, | |
123 // so we don't want to use it here. | |
124 template <class Char> inline Char ToLowerASCII(Char c) { | |
125 return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; | |
126 } | |
127 | |
128 // ASCII-specific toupper. The standard library's toupper is locale sensitive, | |
129 // so we don't want to use it here. | |
130 template <class Char> inline Char ToUpperASCII(Char c) { | |
131 return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c; | |
132 } | |
133 | |
134 // Function objects to aid in comparing/searching strings. | |
135 | |
136 template<typename Char> struct CaseInsensitiveCompare { | |
137 public: | |
138 bool operator()(Char x, Char y) const { | |
139 // TODO(darin): Do we really want to do locale sensitive comparisons here? | |
140 // See http://crbug.com/24917 | |
141 return tolower(x) == tolower(y); | |
142 } | |
143 }; | |
144 | |
145 template<typename Char> struct CaseInsensitiveCompareASCII { | |
146 public: | |
147 bool operator()(Char x, Char y) const { | |
148 return ToLowerASCII(x) == ToLowerASCII(y); | |
149 } | |
150 }; | |
151 | |
152 } // namespace base | |
153 | |
154 #if defined(OS_WIN) | |
155 #include "base/string_util_win.h" | |
156 #elif defined(OS_POSIX) | |
157 #include "base/string_util_posix.h" | |
158 #else | |
159 #error Define string operations appropriately for your platform | |
160 #endif | 16 #endif |
161 | 17 |
162 // These threadsafe functions return references to globally unique empty | |
163 // strings. | |
164 // | |
165 // DO NOT USE THESE AS A GENERAL-PURPOSE SUBSTITUTE FOR DEFAULT CONSTRUCTORS. | |
166 // There is only one case where you should use these: functions which need to | |
167 // return a string by reference (e.g. as a class member accessor), and don't | |
168 // have an empty string to use (e.g. in an error case). These should not be | |
169 // used as initializers, function arguments, or return values for functions | |
170 // which return by value or outparam. | |
171 BASE_API const std::string& EmptyString(); | |
172 BASE_API const std::wstring& EmptyWString(); | |
173 BASE_API const string16& EmptyString16(); | |
174 | |
175 extern const wchar_t kWhitespaceWide[]; | 18 extern const wchar_t kWhitespaceWide[]; |
176 extern const char16 kWhitespaceUTF16[]; | 19 extern const char16 kWhitespaceUTF16[]; |
177 extern const char kWhitespaceASCII[]; | 20 extern const char kWhitespaceASCII[]; |
178 | |
179 extern const char kUtf8ByteOrderMark[]; | 21 extern const char kUtf8ByteOrderMark[]; |
180 | 22 |
181 // Removes characters in remove_chars from anywhere in input. Returns true if | 23 #endif // BASE_STRING_UTIL_STATIC_H_ |
182 // any characters were removed. | |
183 // NOTE: Safe to use the same variable for both input and output. | |
184 BASE_API bool RemoveChars(const std::wstring& input, | |
185 const wchar_t remove_chars[], | |
186 std::wstring* output); | |
187 BASE_API bool RemoveChars(const string16& input, | |
188 const char16 remove_chars[], | |
189 string16* output); | |
190 BASE_API bool RemoveChars(const std::string& input, | |
191 const char remove_chars[], | |
192 std::string* output); | |
193 | |
194 // Removes characters in trim_chars from the beginning and end of input. | |
195 // NOTE: Safe to use the same variable for both input and output. | |
196 BASE_API bool TrimString(const std::wstring& input, | |
197 const wchar_t trim_chars[], | |
198 std::wstring* output); | |
199 BASE_API bool TrimString(const string16& input, | |
200 const char16 trim_chars[], | |
201 string16* output); | |
202 BASE_API bool TrimString(const std::string& input, | |
203 const char trim_chars[], | |
204 std::string* output); | |
205 | |
206 // Truncates a string to the nearest UTF-8 character that will leave | |
207 // the string less than or equal to the specified byte size. | |
208 BASE_API void TruncateUTF8ToByteSize(const std::string& input, | |
209 const size_t byte_size, | |
210 std::string* output); | |
211 | |
212 // Trims any whitespace from either end of the input string. Returns where | |
213 // whitespace was found. | |
214 // The non-wide version has two functions: | |
215 // * TrimWhitespaceASCII() | |
216 // This function is for ASCII strings and only looks for ASCII whitespace; | |
217 // Please choose the best one according to your usage. | |
218 // NOTE: Safe to use the same variable for both input and output. | |
219 enum TrimPositions { | |
220 TRIM_NONE = 0, | |
221 TRIM_LEADING = 1 << 0, | |
222 TRIM_TRAILING = 1 << 1, | |
223 TRIM_ALL = TRIM_LEADING | TRIM_TRAILING, | |
224 }; | |
225 BASE_API TrimPositions TrimWhitespace(const std::wstring& input, | |
226 TrimPositions positions, | |
227 std::wstring* output); | |
228 BASE_API TrimPositions TrimWhitespace(const string16& input, | |
229 TrimPositions positions, | |
230 string16* output); | |
231 BASE_API TrimPositions TrimWhitespaceASCII(const std::string& input, | |
232 TrimPositions positions, | |
233 std::string* output); | |
234 | |
235 // Deprecated. This function is only for backward compatibility and calls | |
236 // TrimWhitespaceASCII(). | |
237 BASE_API TrimPositions TrimWhitespace(const std::string& input, | |
238 TrimPositions positions, | |
239 std::string* output); | |
240 | |
241 // Searches for CR or LF characters. Removes all contiguous whitespace | |
242 // strings that contain them. This is useful when trying to deal with text | |
243 // copied from terminals. | |
244 // Returns |text|, with the following three transformations: | |
245 // (1) Leading and trailing whitespace is trimmed. | |
246 // (2) If |trim_sequences_with_line_breaks| is true, any other whitespace | |
247 // sequences containing a CR or LF are trimmed. | |
248 // (3) All other whitespace sequences are converted to single spaces. | |
249 BASE_API std::wstring CollapseWhitespace(const std::wstring& text, | |
250 bool trim_sequences_with_line_breaks); | |
251 BASE_API string16 CollapseWhitespace(const string16& text, | |
252 bool trim_sequences_with_line_breaks); | |
253 BASE_API std::string CollapseWhitespaceASCII( | |
254 const std::string& text, bool trim_sequences_with_line_breaks); | |
255 | |
256 // Returns true if the passed string is empty or contains only white-space | |
257 // characters. | |
258 BASE_API bool ContainsOnlyWhitespaceASCII(const std::string& str); | |
259 BASE_API bool ContainsOnlyWhitespace(const string16& str); | |
260 | |
261 // Returns true if |input| is empty or contains only characters found in | |
262 // |characters|. | |
263 BASE_API bool ContainsOnlyChars(const std::wstring& input, | |
264 const std::wstring& characters); | |
265 BASE_API bool ContainsOnlyChars(const string16& input, | |
266 const string16& characters); | |
267 BASE_API bool ContainsOnlyChars(const std::string& input, | |
268 const std::string& characters); | |
269 | |
270 // Converts to 7-bit ASCII by truncating. The result must be known to be ASCII | |
271 // beforehand. | |
272 BASE_API std::string WideToASCII(const std::wstring& wide); | |
273 BASE_API std::string UTF16ToASCII(const string16& utf16); | |
274 | |
275 // Converts the given wide string to the corresponding Latin1. This will fail | |
276 // (return false) if any characters are more than 255. | |
277 BASE_API bool WideToLatin1(const std::wstring& wide, std::string* latin1); | |
278 | |
279 // Returns true if the specified string matches the criteria. How can a wide | |
280 // string be 8-bit or UTF8? It contains only characters that are < 256 (in the | |
281 // first case) or characters that use only 8-bits and whose 8-bit | |
282 // representation looks like a UTF-8 string (the second case). | |
283 // | |
284 // Note that IsStringUTF8 checks not only if the input is structurally | |
285 // valid but also if it doesn't contain any non-character codepoint | |
286 // (e.g. U+FFFE). It's done on purpose because all the existing callers want | |
287 // to have the maximum 'discriminating' power from other encodings. If | |
288 // there's a use case for just checking the structural validity, we have to | |
289 // add a new function for that. | |
290 BASE_API bool IsStringUTF8(const std::string& str); | |
291 BASE_API bool IsStringASCII(const std::wstring& str); | |
292 BASE_API bool IsStringASCII(const base::StringPiece& str); | |
293 BASE_API bool IsStringASCII(const string16& str); | |
294 | |
295 // Converts the elements of the given string. This version uses a pointer to | |
296 // clearly differentiate it from the non-pointer variant. | |
297 template <class str> inline void StringToLowerASCII(str* s) { | |
298 for (typename str::iterator i = s->begin(); i != s->end(); ++i) | |
299 *i = base::ToLowerASCII(*i); | |
300 } | |
301 | |
302 template <class str> inline str StringToLowerASCII(const str& s) { | |
303 // for std::string and std::wstring | |
304 str output(s); | |
305 StringToLowerASCII(&output); | |
306 return output; | |
307 } | |
308 | |
309 // Converts the elements of the given string. This version uses a pointer to | |
310 // clearly differentiate it from the non-pointer variant. | |
311 template <class str> inline void StringToUpperASCII(str* s) { | |
312 for (typename str::iterator i = s->begin(); i != s->end(); ++i) | |
313 *i = base::ToUpperASCII(*i); | |
314 } | |
315 | |
316 template <class str> inline str StringToUpperASCII(const str& s) { | |
317 // for std::string and std::wstring | |
318 str output(s); | |
319 StringToUpperASCII(&output); | |
320 return output; | |
321 } | |
322 | |
323 // Compare the lower-case form of the given string against the given ASCII | |
324 // string. This is useful for doing checking if an input string matches some | |
325 // token, and it is optimized to avoid intermediate string copies. This API is | |
326 // borrowed from the equivalent APIs in Mozilla. | |
327 BASE_API bool LowerCaseEqualsASCII(const std::string& a, const char* b); | |
328 BASE_API bool LowerCaseEqualsASCII(const std::wstring& a, const char* b); | |
329 BASE_API bool LowerCaseEqualsASCII(const string16& a, const char* b); | |
330 | |
331 // Same thing, but with string iterators instead. | |
332 BASE_API bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, | |
333 std::string::const_iterator a_end, | |
334 const char* b); | |
335 BASE_API bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin, | |
336 std::wstring::const_iterator a_end, | |
337 const char* b); | |
338 BASE_API bool LowerCaseEqualsASCII(string16::const_iterator a_begin, | |
339 string16::const_iterator a_end, | |
340 const char* b); | |
341 BASE_API bool LowerCaseEqualsASCII(const char* a_begin, | |
342 const char* a_end, | |
343 const char* b); | |
344 BASE_API bool LowerCaseEqualsASCII(const wchar_t* a_begin, | |
345 const wchar_t* a_end, | |
346 const char* b); | |
347 BASE_API bool LowerCaseEqualsASCII(const char16* a_begin, | |
348 const char16* a_end, | |
349 const char* b); | |
350 | |
351 // Performs a case-sensitive string compare. The behavior is undefined if both | |
352 // strings are not ASCII. | |
353 BASE_API bool EqualsASCII(const string16& a, const base::StringPiece& b); | |
354 | |
355 // Returns true if str starts with search, or false otherwise. | |
356 BASE_API bool StartsWithASCII(const std::string& str, | |
357 const std::string& search, | |
358 bool case_sensitive); | |
359 BASE_API bool StartsWith(const std::wstring& str, | |
360 const std::wstring& search, | |
361 bool case_sensitive); | |
362 BASE_API bool StartsWith(const string16& str, | |
363 const string16& search, | |
364 bool case_sensitive); | |
365 | |
366 // Returns true if str ends with search, or false otherwise. | |
367 BASE_API bool EndsWith(const std::string& str, | |
368 const std::string& search, | |
369 bool case_sensitive); | |
370 BASE_API bool EndsWith(const std::wstring& str, | |
371 const std::wstring& search, | |
372 bool case_sensitive); | |
373 BASE_API bool EndsWith(const string16& str, | |
374 const string16& search, | |
375 bool case_sensitive); | |
376 | |
377 | |
378 // Determines the type of ASCII character, independent of locale (the C | |
379 // library versions will change based on locale). | |
380 template <typename Char> | |
381 inline bool IsAsciiWhitespace(Char c) { | |
382 return c == ' ' || c == '\r' || c == '\n' || c == '\t'; | |
383 } | |
384 template <typename Char> | |
385 inline bool IsAsciiAlpha(Char c) { | |
386 return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')); | |
387 } | |
388 template <typename Char> | |
389 inline bool IsAsciiDigit(Char c) { | |
390 return c >= '0' && c <= '9'; | |
391 } | |
392 | |
393 template <typename Char> | |
394 inline bool IsHexDigit(Char c) { | |
395 return (c >= '0' && c <= '9') || | |
396 (c >= 'A' && c <= 'F') || | |
397 (c >= 'a' && c <= 'f'); | |
398 } | |
399 | |
400 template <typename Char> | |
401 inline Char HexDigitToInt(Char c) { | |
402 DCHECK(IsHexDigit(c)); | |
403 if (c >= '0' && c <= '9') | |
404 return c - '0'; | |
405 if (c >= 'A' && c <= 'F') | |
406 return c - 'A' + 10; | |
407 if (c >= 'a' && c <= 'f') | |
408 return c - 'a' + 10; | |
409 return 0; | |
410 } | |
411 | |
412 // Returns true if it's a whitespace character. | |
413 inline bool IsWhitespace(wchar_t c) { | |
414 return wcschr(kWhitespaceWide, c) != NULL; | |
415 } | |
416 | |
417 enum DataUnits { | |
418 DATA_UNITS_BYTE = 0, | |
419 DATA_UNITS_KIBIBYTE, | |
420 DATA_UNITS_MEBIBYTE, | |
421 DATA_UNITS_GIBIBYTE, | |
422 }; | |
423 | |
424 // Return the unit type that is appropriate for displaying the amount of bytes | |
425 // passed in. | |
426 BASE_API DataUnits GetByteDisplayUnits(int64 bytes); | |
427 | |
428 // Return a byte string in human-readable format, displayed in units appropriate | |
429 // specified by 'units', with an optional unit suffix. | |
430 // Ex: FormatBytes(512, DATA_UNITS_KIBIBYTE, true) => "0.5 KB" | |
431 // Ex: FormatBytes(10*1024, DATA_UNITS_MEBIBYTE, false) => "0.1" | |
432 BASE_API string16 FormatBytes(int64 bytes, DataUnits units, bool show_units); | |
433 | |
434 // As above, but with "/s" units. | |
435 // Ex: FormatSpeed(512, DATA_UNITS_KIBIBYTE, true) => "0.5 KB/s" | |
436 // Ex: FormatSpeed(10*1024, DATA_UNITS_MEBIBYTE, false) => "0.1" | |
437 BASE_API string16 FormatSpeed(int64 bytes, DataUnits units, bool show_units); | |
438 | |
439 // Return a number formated with separators in the user's locale way. | |
440 // Ex: FormatNumber(1234567) => 1,234,567 | |
441 BASE_API string16 FormatNumber(int64 number); | |
442 | |
443 // Starting at |start_offset| (usually 0), replace the first instance of | |
444 // |find_this| with |replace_with|. | |
445 BASE_API void ReplaceFirstSubstringAfterOffset(string16* str, | |
446 string16::size_type start_offset, | |
447 const string16& find_this, | |
448 const string16& replace_with); | |
449 BASE_API void ReplaceFirstSubstringAfterOffset( | |
450 std::string* str, | |
451 std::string::size_type start_offset, | |
452 const std::string& find_this, | |
453 const std::string& replace_with); | |
454 | |
455 // Starting at |start_offset| (usually 0), look through |str| and replace all | |
456 // instances of |find_this| with |replace_with|. | |
457 // | |
458 // This does entire substrings; use std::replace in <algorithm> for single | |
459 // characters, for example: | |
460 // std::replace(str.begin(), str.end(), 'a', 'b'); | |
461 BASE_API void ReplaceSubstringsAfterOffset(string16* str, | |
462 string16::size_type start_offset, | |
463 const string16& find_this, | |
464 const string16& replace_with); | |
465 BASE_API void ReplaceSubstringsAfterOffset(std::string* str, | |
466 std::string::size_type start_offset, | |
467 const std::string& find_this, | |
468 const std::string& replace_with); | |
469 | |
470 // This is mpcomplete's pattern for saving a string copy when dealing with | |
471 // a function that writes results into a wchar_t[] and wanting the result to | |
472 // end up in a std::wstring. It ensures that the std::wstring's internal | |
473 // buffer has enough room to store the characters to be written into it, and | |
474 // sets its .length() attribute to the right value. | |
475 // | |
476 // The reserve() call allocates the memory required to hold the string | |
477 // plus a terminating null. This is done because resize() isn't | |
478 // guaranteed to reserve space for the null. The resize() call is | |
479 // simply the only way to change the string's 'length' member. | |
480 // | |
481 // XXX-performance: the call to wide.resize() takes linear time, since it fills | |
482 // the string's buffer with nulls. I call it to change the length of the | |
483 // string (needed because writing directly to the buffer doesn't do this). | |
484 // Perhaps there's a constant-time way to change the string's length. | |
485 template <class string_type> | |
486 inline typename string_type::value_type* WriteInto(string_type* str, | |
487 size_t length_with_null) { | |
488 str->reserve(length_with_null); | |
489 str->resize(length_with_null - 1); | |
490 return &((*str)[0]); | |
491 } | |
492 | |
493 //----------------------------------------------------------------------------- | |
494 | |
495 // Splits a string into its fields delimited by any of the characters in | |
496 // |delimiters|. Each field is added to the |tokens| vector. Returns the | |
497 // number of tokens found. | |
498 BASE_API size_t Tokenize(const std::wstring& str, | |
499 const std::wstring& delimiters, | |
500 std::vector<std::wstring>* tokens); | |
501 BASE_API size_t Tokenize(const string16& str, | |
502 const string16& delimiters, | |
503 std::vector<string16>* tokens); | |
504 BASE_API size_t Tokenize(const std::string& str, | |
505 const std::string& delimiters, | |
506 std::vector<std::string>* tokens); | |
507 BASE_API size_t Tokenize(const base::StringPiece& str, | |
508 const base::StringPiece& delimiters, | |
509 std::vector<base::StringPiece>* tokens); | |
510 | |
511 // Does the opposite of SplitString(). | |
512 BASE_API string16 JoinString(const std::vector<string16>& parts, char16 s); | |
513 BASE_API std::string JoinString(const std::vector<std::string>& parts, char s); | |
514 | |
515 // Replace $1-$2-$3..$9 in the format string with |a|-|b|-|c|..|i| respectively. | |
516 // Additionally, any number of consecutive '$' characters is replaced by that | |
517 // number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be | |
518 // NULL. This only allows you to use up to nine replacements. | |
519 BASE_API string16 ReplaceStringPlaceholders(const string16& format_string, | |
520 const std::vector<string16>& subst, | |
521 std::vector<size_t>* offsets); | |
522 | |
523 BASE_API std::string ReplaceStringPlaceholders( | |
524 const base::StringPiece& format_string, | |
525 const std::vector<std::string>& subst, | |
526 std::vector<size_t>* offsets); | |
527 | |
528 // Single-string shortcut for ReplaceStringHolders. |offset| may be NULL. | |
529 BASE_API string16 ReplaceStringPlaceholders(const string16& format_string, | |
530 const string16& a, | |
531 size_t* offset); | |
532 | |
533 // Returns true if the string passed in matches the pattern. The pattern | |
534 // string can contain wildcards like * and ? | |
535 // The backslash character (\) is an escape character for * and ? | |
536 // We limit the patterns to having a max of 16 * or ? characters. | |
537 // ? matches 0 or 1 character, while * matches 0 or more characters. | |
538 BASE_API bool MatchPattern(const base::StringPiece& string, | |
539 const base::StringPiece& pattern); | |
540 BASE_API bool MatchPattern(const string16& string, const string16& pattern); | |
541 | |
542 // Hack to convert any char-like type to its unsigned counterpart. | |
543 // For example, it will convert char, signed char and unsigned char to unsigned | |
544 // char. | |
545 template<typename T> | |
546 struct ToUnsigned { | |
547 typedef T Unsigned; | |
548 }; | |
549 | |
550 template<> | |
551 struct ToUnsigned<char> { | |
552 typedef unsigned char Unsigned; | |
553 }; | |
554 template<> | |
555 struct ToUnsigned<signed char> { | |
556 typedef unsigned char Unsigned; | |
557 }; | |
558 template<> | |
559 struct ToUnsigned<wchar_t> { | |
560 #if defined(WCHAR_T_IS_UTF16) | |
561 typedef unsigned short Unsigned; | |
562 #elif defined(WCHAR_T_IS_UTF32) | |
563 typedef uint32 Unsigned; | |
564 #endif | |
565 }; | |
566 template<> | |
567 struct ToUnsigned<short> { | |
568 typedef unsigned short Unsigned; | |
569 }; | |
570 | |
571 #endif // BASE_STRING_UTIL_H_ | |
OLD | NEW |