OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 // | 4 // |
5 // This file defines utility functions for working with strings. | 5 // This file defines utility functions for working with strings. |
6 | 6 |
7 #ifndef BASE_STRING_UTIL_H_ | 7 #ifndef BASE_STRING_UTIL_H_ |
8 #define BASE_STRING_UTIL_H_ | 8 #define BASE_STRING_UTIL_H_ |
9 #pragma once | 9 #pragma once |
10 | 10 |
11 #include <stdarg.h> // va_list | 11 #include <stdarg.h> // va_list |
12 | 12 |
13 #include <string> | 13 #include <string> |
14 #include <vector> | 14 #include <vector> |
15 | 15 |
| 16 #include "base/base_api.h" |
16 #include "base/basictypes.h" | 17 #include "base/basictypes.h" |
17 #include "base/compiler_specific.h" | 18 #include "base/compiler_specific.h" |
18 #include "base/string16.h" | 19 #include "base/string16.h" |
19 #include "base/string_piece.h" // For implicit conversions. | 20 #include "base/string_piece.h" // For implicit conversions. |
20 | 21 |
21 // TODO(brettw) remove this dependency. Previously StringPrintf lived in this | 22 // TODO(brettw) remove this dependency. Previously StringPrintf lived in this |
22 // file. We need to convert the callers over to using stringprintf.h instead | 23 // file. We need to convert the callers over to using stringprintf.h instead |
23 // and then remove this. | 24 // and then remove this. |
24 #include "base/stringprintf.h" | 25 #include "base/stringprintf.h" |
25 | 26 |
26 // Safe standard library wrappers for all platforms. | 27 // Safe standard library wrappers for all platforms. |
27 | 28 |
28 namespace base { | 29 namespace base { |
29 | 30 |
30 // C standard-library functions like "strncasecmp" and "snprintf" that aren't | 31 // C standard-library functions like "strncasecmp" and "snprintf" that aren't |
31 // cross-platform are provided as "base::strncasecmp", and their prototypes | 32 // cross-platform are provided as "base::strncasecmp", and their prototypes |
32 // are listed below. These functions are then implemented as inline calls | 33 // are listed below. These functions are then implemented as inline calls |
33 // to the platform-specific equivalents in the platform-specific headers. | 34 // to the platform-specific equivalents in the platform-specific headers. |
34 | 35 |
35 // Compares the two strings s1 and s2 without regard to case using | 36 // Compares the two strings s1 and s2 without regard to case using |
36 // the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if | 37 // the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if |
37 // s2 > s1 according to a lexicographic comparison. | 38 // s2 > s1 according to a lexicographic comparison. |
38 int strcasecmp(const char* s1, const char* s2); | 39 BASE_API int strcasecmp(const char* s1, const char* s2); |
39 | 40 |
40 // Compares up to count characters of s1 and s2 without regard to case using | 41 // Compares up to count characters of s1 and s2 without regard to case using |
41 // the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if | 42 // the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if |
42 // s2 > s1 according to a lexicographic comparison. | 43 // s2 > s1 according to a lexicographic comparison. |
43 int strncasecmp(const char* s1, const char* s2, size_t count); | 44 BASE_API int strncasecmp(const char* s1, const char* s2, size_t count); |
44 | 45 |
45 // Same as strncmp but for char16 strings. | 46 // Same as strncmp but for char16 strings. |
46 int strncmp16(const char16* s1, const char16* s2, size_t count); | 47 BASE_API int strncmp16(const char16* s1, const char16* s2, size_t count); |
47 | 48 |
48 // Wrapper for vsnprintf that always null-terminates and always returns the | 49 // Wrapper for vsnprintf that always null-terminates and always returns the |
49 // number of characters that would be in an untruncated formatted | 50 // number of characters that would be in an untruncated formatted |
50 // string, even when truncation occurs. | 51 // string, even when truncation occurs. |
51 int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments) | 52 BASE_API int vsnprintf(char* buffer, size_t size, const char* format, |
| 53 va_list arguments) |
52 PRINTF_FORMAT(3, 0); | 54 PRINTF_FORMAT(3, 0); |
53 | 55 |
54 // vswprintf always null-terminates, but when truncation occurs, it will either | 56 // vswprintf always null-terminates, but when truncation occurs, it will either |
55 // return -1 or the number of characters that would be in an untruncated | 57 // return -1 or the number of characters that would be in an untruncated |
56 // formatted string. The actual return value depends on the underlying | 58 // formatted string. The actual return value depends on the underlying |
57 // C library's vswprintf implementation. | 59 // C library's vswprintf implementation. |
58 int vswprintf(wchar_t* buffer, size_t size, | 60 BASE_API int vswprintf(wchar_t* buffer, size_t size, |
59 const wchar_t* format, va_list arguments) WPRINTF_FORMAT(3, 0); | 61 const wchar_t* format, va_list arguments) |
| 62 WPRINTF_FORMAT(3, 0); |
60 | 63 |
61 // Some of these implementations need to be inlined. | 64 // Some of these implementations need to be inlined. |
62 | 65 |
63 // We separate the declaration from the implementation of this inline | 66 // We separate the declaration from the implementation of this inline |
64 // function just so the PRINTF_FORMAT works. | 67 // function just so the PRINTF_FORMAT works. |
65 inline int snprintf(char* buffer, size_t size, const char* format, ...) | 68 inline int snprintf(char* buffer, size_t size, const char* format, ...) |
66 PRINTF_FORMAT(3, 4); | 69 PRINTF_FORMAT(3, 4); |
67 inline int snprintf(char* buffer, size_t size, const char* format, ...) { | 70 inline int snprintf(char* buffer, size_t size, const char* format, ...) { |
68 va_list arguments; | 71 va_list arguments; |
69 va_start(arguments, format); | 72 va_start(arguments, format); |
(...skipping 13 matching lines...) Expand all Loading... |
83 va_end(arguments); | 86 va_end(arguments); |
84 return result; | 87 return result; |
85 } | 88 } |
86 | 89 |
87 // BSD-style safe and consistent string copy functions. | 90 // BSD-style safe and consistent string copy functions. |
88 // Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|. | 91 // Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|. |
89 // Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as | 92 // Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as |
90 // long as |dst_size| is not 0. Returns the length of |src| in characters. | 93 // long as |dst_size| is not 0. Returns the length of |src| in characters. |
91 // If the return value is >= dst_size, then the output was truncated. | 94 // If the return value is >= dst_size, then the output was truncated. |
92 // NOTE: All sizes are in number of characters, NOT in bytes. | 95 // NOTE: All sizes are in number of characters, NOT in bytes. |
93 size_t strlcpy(char* dst, const char* src, size_t dst_size); | 96 BASE_API size_t strlcpy(char* dst, const char* src, size_t dst_size); |
94 size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size); | 97 BASE_API size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size); |
95 | 98 |
96 // Scan a wprintf format string to determine whether it's portable across a | 99 // Scan a wprintf format string to determine whether it's portable across a |
97 // variety of systems. This function only checks that the conversion | 100 // variety of systems. This function only checks that the conversion |
98 // specifiers used by the format string are supported and have the same meaning | 101 // specifiers used by the format string are supported and have the same meaning |
99 // on a variety of systems. It doesn't check for other errors that might occur | 102 // on a variety of systems. It doesn't check for other errors that might occur |
100 // within a format string. | 103 // within a format string. |
101 // | 104 // |
102 // Nonportable conversion specifiers for wprintf are: | 105 // Nonportable conversion specifiers for wprintf are: |
103 // - 's' and 'c' without an 'l' length modifier. %s and %c operate on char | 106 // - 's' and 'c' without an 'l' length modifier. %s and %c operate on char |
104 // data on all systems except Windows, which treat them as wchar_t data. | 107 // data on all systems except Windows, which treat them as wchar_t data. |
105 // Use %ls and %lc for wchar_t data instead. | 108 // Use %ls and %lc for wchar_t data instead. |
106 // - 'S' and 'C', which operate on wchar_t data on all systems except Windows, | 109 // - 'S' and 'C', which operate on wchar_t data on all systems except Windows, |
107 // which treat them as char data. Use %ls and %lc for wchar_t data | 110 // which treat them as char data. Use %ls and %lc for wchar_t data |
108 // instead. | 111 // instead. |
109 // - 'F', which is not identified by Windows wprintf documentation. | 112 // - 'F', which is not identified by Windows wprintf documentation. |
110 // - 'D', 'O', and 'U', which are deprecated and not available on all systems. | 113 // - 'D', 'O', and 'U', which are deprecated and not available on all systems. |
111 // Use %ld, %lo, and %lu instead. | 114 // Use %ld, %lo, and %lu instead. |
112 // | 115 // |
113 // Note that there is no portable conversion specifier for char data when | 116 // Note that there is no portable conversion specifier for char data when |
114 // working with wprintf. | 117 // working with wprintf. |
115 // | 118 // |
116 // This function is intended to be called from base::vswprintf. | 119 // This function is intended to be called from base::vswprintf. |
117 bool IsWprintfFormatPortable(const wchar_t* format); | 120 BASE_API bool IsWprintfFormatPortable(const wchar_t* format); |
118 | 121 |
119 // ASCII-specific tolower. The standard library's tolower is locale sensitive, | 122 // ASCII-specific tolower. The standard library's tolower is locale sensitive, |
120 // so we don't want to use it here. | 123 // so we don't want to use it here. |
121 template <class Char> inline Char ToLowerASCII(Char c) { | 124 template <class Char> inline Char ToLowerASCII(Char c) { |
122 return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; | 125 return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; |
123 } | 126 } |
124 | 127 |
125 // ASCII-specific toupper. The standard library's toupper is locale sensitive, | 128 // ASCII-specific toupper. The standard library's toupper is locale sensitive, |
126 // so we don't want to use it here. | 129 // so we don't want to use it here. |
127 template <class Char> inline Char ToUpperASCII(Char c) { | 130 template <class Char> inline Char ToUpperASCII(Char c) { |
(...skipping 30 matching lines...) Expand all Loading... |
158 | 161 |
159 // These threadsafe functions return references to globally unique empty | 162 // These threadsafe functions return references to globally unique empty |
160 // strings. | 163 // strings. |
161 // | 164 // |
162 // DO NOT USE THESE AS A GENERAL-PURPOSE SUBSTITUTE FOR DEFAULT CONSTRUCTORS. | 165 // DO NOT USE THESE AS A GENERAL-PURPOSE SUBSTITUTE FOR DEFAULT CONSTRUCTORS. |
163 // There is only one case where you should use these: functions which need to | 166 // There is only one case where you should use these: functions which need to |
164 // return a string by reference (e.g. as a class member accessor), and don't | 167 // return a string by reference (e.g. as a class member accessor), and don't |
165 // have an empty string to use (e.g. in an error case). These should not be | 168 // have an empty string to use (e.g. in an error case). These should not be |
166 // used as initializers, function arguments, or return values for functions | 169 // used as initializers, function arguments, or return values for functions |
167 // which return by value or outparam. | 170 // which return by value or outparam. |
168 const std::string& EmptyString(); | 171 BASE_API const std::string& EmptyString(); |
169 const std::wstring& EmptyWString(); | 172 BASE_API const std::wstring& EmptyWString(); |
170 const string16& EmptyString16(); | 173 BASE_API const string16& EmptyString16(); |
171 | 174 |
172 extern const wchar_t kWhitespaceWide[]; | 175 extern const wchar_t kWhitespaceWide[]; |
173 extern const char16 kWhitespaceUTF16[]; | 176 extern const char16 kWhitespaceUTF16[]; |
174 extern const char kWhitespaceASCII[]; | 177 extern const char kWhitespaceASCII[]; |
175 | 178 |
176 extern const char kUtf8ByteOrderMark[]; | 179 extern const char kUtf8ByteOrderMark[]; |
177 | 180 |
178 // Removes characters in remove_chars from anywhere in input. Returns true if | 181 // Removes characters in remove_chars from anywhere in input. Returns true if |
179 // any characters were removed. | 182 // any characters were removed. |
180 // NOTE: Safe to use the same variable for both input and output. | 183 // NOTE: Safe to use the same variable for both input and output. |
181 bool RemoveChars(const std::wstring& input, | 184 BASE_API bool RemoveChars(const std::wstring& input, |
182 const wchar_t remove_chars[], | 185 const wchar_t remove_chars[], |
183 std::wstring* output); | 186 std::wstring* output); |
184 bool RemoveChars(const string16& input, | 187 BASE_API bool RemoveChars(const string16& input, |
185 const char16 remove_chars[], | 188 const char16 remove_chars[], |
186 string16* output); | 189 string16* output); |
187 bool RemoveChars(const std::string& input, | 190 BASE_API bool RemoveChars(const std::string& input, |
188 const char remove_chars[], | 191 const char remove_chars[], |
189 std::string* output); | 192 std::string* output); |
190 | 193 |
191 // Removes characters in trim_chars from the beginning and end of input. | 194 // Removes characters in trim_chars from the beginning and end of input. |
192 // NOTE: Safe to use the same variable for both input and output. | 195 // NOTE: Safe to use the same variable for both input and output. |
193 bool TrimString(const std::wstring& input, | 196 BASE_API bool TrimString(const std::wstring& input, |
194 const wchar_t trim_chars[], | 197 const wchar_t trim_chars[], |
195 std::wstring* output); | 198 std::wstring* output); |
196 bool TrimString(const string16& input, | 199 BASE_API bool TrimString(const string16& input, |
197 const char16 trim_chars[], | 200 const char16 trim_chars[], |
198 string16* output); | 201 string16* output); |
199 bool TrimString(const std::string& input, | 202 BASE_API bool TrimString(const std::string& input, |
200 const char trim_chars[], | 203 const char trim_chars[], |
201 std::string* output); | 204 std::string* output); |
202 | 205 |
203 // Truncates a string to the nearest UTF-8 character that will leave | 206 // Truncates a string to the nearest UTF-8 character that will leave |
204 // the string less than or equal to the specified byte size. | 207 // the string less than or equal to the specified byte size. |
205 void TruncateUTF8ToByteSize(const std::string& input, | 208 BASE_API void TruncateUTF8ToByteSize(const std::string& input, |
206 const size_t byte_size, | 209 const size_t byte_size, |
207 std::string* output); | 210 std::string* output); |
208 | 211 |
209 // Trims any whitespace from either end of the input string. Returns where | 212 // Trims any whitespace from either end of the input string. Returns where |
210 // whitespace was found. | 213 // whitespace was found. |
211 // The non-wide version has two functions: | 214 // The non-wide version has two functions: |
212 // * TrimWhitespaceASCII() | 215 // * TrimWhitespaceASCII() |
213 // This function is for ASCII strings and only looks for ASCII whitespace; | 216 // This function is for ASCII strings and only looks for ASCII whitespace; |
214 // Please choose the best one according to your usage. | 217 // Please choose the best one according to your usage. |
215 // NOTE: Safe to use the same variable for both input and output. | 218 // NOTE: Safe to use the same variable for both input and output. |
216 enum TrimPositions { | 219 enum TrimPositions { |
217 TRIM_NONE = 0, | 220 TRIM_NONE = 0, |
218 TRIM_LEADING = 1 << 0, | 221 TRIM_LEADING = 1 << 0, |
219 TRIM_TRAILING = 1 << 1, | 222 TRIM_TRAILING = 1 << 1, |
220 TRIM_ALL = TRIM_LEADING | TRIM_TRAILING, | 223 TRIM_ALL = TRIM_LEADING | TRIM_TRAILING, |
221 }; | 224 }; |
222 TrimPositions TrimWhitespace(const std::wstring& input, | 225 BASE_API TrimPositions TrimWhitespace(const std::wstring& input, |
223 TrimPositions positions, | 226 TrimPositions positions, |
224 std::wstring* output); | 227 std::wstring* output); |
225 TrimPositions TrimWhitespace(const string16& input, | 228 BASE_API TrimPositions TrimWhitespace(const string16& input, |
226 TrimPositions positions, | 229 TrimPositions positions, |
227 string16* output); | 230 string16* output); |
228 TrimPositions TrimWhitespaceASCII(const std::string& input, | 231 BASE_API TrimPositions TrimWhitespaceASCII(const std::string& input, |
229 TrimPositions positions, | 232 TrimPositions positions, |
230 std::string* output); | 233 std::string* output); |
231 | 234 |
232 // Deprecated. This function is only for backward compatibility and calls | 235 // Deprecated. This function is only for backward compatibility and calls |
233 // TrimWhitespaceASCII(). | 236 // TrimWhitespaceASCII(). |
234 TrimPositions TrimWhitespace(const std::string& input, | 237 BASE_API TrimPositions TrimWhitespace(const std::string& input, |
235 TrimPositions positions, | 238 TrimPositions positions, |
236 std::string* output); | 239 std::string* output); |
237 | 240 |
238 // Searches for CR or LF characters. Removes all contiguous whitespace | 241 // Searches for CR or LF characters. Removes all contiguous whitespace |
239 // strings that contain them. This is useful when trying to deal with text | 242 // strings that contain them. This is useful when trying to deal with text |
240 // copied from terminals. | 243 // copied from terminals. |
241 // Returns |text|, with the following three transformations: | 244 // Returns |text|, with the following three transformations: |
242 // (1) Leading and trailing whitespace is trimmed. | 245 // (1) Leading and trailing whitespace is trimmed. |
243 // (2) If |trim_sequences_with_line_breaks| is true, any other whitespace | 246 // (2) If |trim_sequences_with_line_breaks| is true, any other whitespace |
244 // sequences containing a CR or LF are trimmed. | 247 // sequences containing a CR or LF are trimmed. |
245 // (3) All other whitespace sequences are converted to single spaces. | 248 // (3) All other whitespace sequences are converted to single spaces. |
246 std::wstring CollapseWhitespace(const std::wstring& text, | 249 BASE_API std::wstring CollapseWhitespace(const std::wstring& text, |
247 bool trim_sequences_with_line_breaks); | 250 bool trim_sequences_with_line_breaks); |
248 string16 CollapseWhitespace(const string16& text, | 251 BASE_API string16 CollapseWhitespace(const string16& text, |
249 bool trim_sequences_with_line_breaks); | 252 bool trim_sequences_with_line_breaks); |
250 std::string CollapseWhitespaceASCII(const std::string& text, | 253 BASE_API std::string CollapseWhitespaceASCII( |
251 bool trim_sequences_with_line_breaks); | 254 const std::string& text, bool trim_sequences_with_line_breaks); |
252 | 255 |
253 // Returns true if the passed string is empty or contains only white-space | 256 // Returns true if the passed string is empty or contains only white-space |
254 // characters. | 257 // characters. |
255 bool ContainsOnlyWhitespaceASCII(const std::string& str); | 258 BASE_API bool ContainsOnlyWhitespaceASCII(const std::string& str); |
256 bool ContainsOnlyWhitespace(const string16& str); | 259 BASE_API bool ContainsOnlyWhitespace(const string16& str); |
257 | 260 |
258 // Returns true if |input| is empty or contains only characters found in | 261 // Returns true if |input| is empty or contains only characters found in |
259 // |characters|. | 262 // |characters|. |
260 bool ContainsOnlyChars(const std::wstring& input, | 263 BASE_API bool ContainsOnlyChars(const std::wstring& input, |
261 const std::wstring& characters); | 264 const std::wstring& characters); |
262 bool ContainsOnlyChars(const string16& input, const string16& characters); | 265 BASE_API bool ContainsOnlyChars(const string16& input, |
263 bool ContainsOnlyChars(const std::string& input, const std::string& characters); | 266 const string16& characters); |
| 267 BASE_API bool ContainsOnlyChars(const std::string& input, |
| 268 const std::string& characters); |
264 | 269 |
265 // Converts to 7-bit ASCII by truncating. The result must be known to be ASCII | 270 // Converts to 7-bit ASCII by truncating. The result must be known to be ASCII |
266 // beforehand. | 271 // beforehand. |
267 std::string WideToASCII(const std::wstring& wide); | 272 BASE_API std::string WideToASCII(const std::wstring& wide); |
268 std::string UTF16ToASCII(const string16& utf16); | 273 BASE_API std::string UTF16ToASCII(const string16& utf16); |
269 | 274 |
270 // Converts the given wide string to the corresponding Latin1. This will fail | 275 // Converts the given wide string to the corresponding Latin1. This will fail |
271 // (return false) if any characters are more than 255. | 276 // (return false) if any characters are more than 255. |
272 bool WideToLatin1(const std::wstring& wide, std::string* latin1); | 277 BASE_API bool WideToLatin1(const std::wstring& wide, std::string* latin1); |
273 | 278 |
274 // Returns true if the specified string matches the criteria. How can a wide | 279 // Returns true if the specified string matches the criteria. How can a wide |
275 // string be 8-bit or UTF8? It contains only characters that are < 256 (in the | 280 // string be 8-bit or UTF8? It contains only characters that are < 256 (in the |
276 // first case) or characters that use only 8-bits and whose 8-bit | 281 // first case) or characters that use only 8-bits and whose 8-bit |
277 // representation looks like a UTF-8 string (the second case). | 282 // representation looks like a UTF-8 string (the second case). |
278 // | 283 // |
279 // Note that IsStringUTF8 checks not only if the input is structurally | 284 // Note that IsStringUTF8 checks not only if the input is structurally |
280 // valid but also if it doesn't contain any non-character codepoint | 285 // valid but also if it doesn't contain any non-character codepoint |
281 // (e.g. U+FFFE). It's done on purpose because all the existing callers want | 286 // (e.g. U+FFFE). It's done on purpose because all the existing callers want |
282 // to have the maximum 'discriminating' power from other encodings. If | 287 // to have the maximum 'discriminating' power from other encodings. If |
283 // there's a use case for just checking the structural validity, we have to | 288 // there's a use case for just checking the structural validity, we have to |
284 // add a new function for that. | 289 // add a new function for that. |
285 bool IsStringUTF8(const std::string& str); | 290 BASE_API bool IsStringUTF8(const std::string& str); |
286 bool IsStringASCII(const std::wstring& str); | 291 BASE_API bool IsStringASCII(const std::wstring& str); |
287 bool IsStringASCII(const base::StringPiece& str); | 292 BASE_API bool IsStringASCII(const base::StringPiece& str); |
288 bool IsStringASCII(const string16& str); | 293 BASE_API bool IsStringASCII(const string16& str); |
289 | 294 |
290 // Converts the elements of the given string. This version uses a pointer to | 295 // Converts the elements of the given string. This version uses a pointer to |
291 // clearly differentiate it from the non-pointer variant. | 296 // clearly differentiate it from the non-pointer variant. |
292 template <class str> inline void StringToLowerASCII(str* s) { | 297 template <class str> inline void StringToLowerASCII(str* s) { |
293 for (typename str::iterator i = s->begin(); i != s->end(); ++i) | 298 for (typename str::iterator i = s->begin(); i != s->end(); ++i) |
294 *i = base::ToLowerASCII(*i); | 299 *i = base::ToLowerASCII(*i); |
295 } | 300 } |
296 | 301 |
297 template <class str> inline str StringToLowerASCII(const str& s) { | 302 template <class str> inline str StringToLowerASCII(const str& s) { |
298 // for std::string and std::wstring | 303 // for std::string and std::wstring |
(...skipping 13 matching lines...) Expand all Loading... |
312 // for std::string and std::wstring | 317 // for std::string and std::wstring |
313 str output(s); | 318 str output(s); |
314 StringToUpperASCII(&output); | 319 StringToUpperASCII(&output); |
315 return output; | 320 return output; |
316 } | 321 } |
317 | 322 |
318 // Compare the lower-case form of the given string against the given ASCII | 323 // Compare the lower-case form of the given string against the given ASCII |
319 // string. This is useful for doing checking if an input string matches some | 324 // string. This is useful for doing checking if an input string matches some |
320 // token, and it is optimized to avoid intermediate string copies. This API is | 325 // token, and it is optimized to avoid intermediate string copies. This API is |
321 // borrowed from the equivalent APIs in Mozilla. | 326 // borrowed from the equivalent APIs in Mozilla. |
322 bool LowerCaseEqualsASCII(const std::string& a, const char* b); | 327 BASE_API bool LowerCaseEqualsASCII(const std::string& a, const char* b); |
323 bool LowerCaseEqualsASCII(const std::wstring& a, const char* b); | 328 BASE_API bool LowerCaseEqualsASCII(const std::wstring& a, const char* b); |
324 bool LowerCaseEqualsASCII(const string16& a, const char* b); | 329 BASE_API bool LowerCaseEqualsASCII(const string16& a, const char* b); |
325 | 330 |
326 // Same thing, but with string iterators instead. | 331 // Same thing, but with string iterators instead. |
327 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, | 332 BASE_API bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, |
328 std::string::const_iterator a_end, | 333 std::string::const_iterator a_end, |
329 const char* b); | 334 const char* b); |
330 bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin, | 335 BASE_API bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin, |
331 std::wstring::const_iterator a_end, | 336 std::wstring::const_iterator a_end, |
332 const char* b); | 337 const char* b); |
333 bool LowerCaseEqualsASCII(string16::const_iterator a_begin, | 338 BASE_API bool LowerCaseEqualsASCII(string16::const_iterator a_begin, |
334 string16::const_iterator a_end, | 339 string16::const_iterator a_end, |
335 const char* b); | 340 const char* b); |
336 bool LowerCaseEqualsASCII(const char* a_begin, | 341 BASE_API bool LowerCaseEqualsASCII(const char* a_begin, |
337 const char* a_end, | 342 const char* a_end, |
338 const char* b); | 343 const char* b); |
339 bool LowerCaseEqualsASCII(const wchar_t* a_begin, | 344 BASE_API bool LowerCaseEqualsASCII(const wchar_t* a_begin, |
340 const wchar_t* a_end, | 345 const wchar_t* a_end, |
341 const char* b); | 346 const char* b); |
342 bool LowerCaseEqualsASCII(const char16* a_begin, | 347 BASE_API bool LowerCaseEqualsASCII(const char16* a_begin, |
343 const char16* a_end, | 348 const char16* a_end, |
344 const char* b); | 349 const char* b); |
345 | 350 |
346 // Performs a case-sensitive string compare. The behavior is undefined if both | 351 // Performs a case-sensitive string compare. The behavior is undefined if both |
347 // strings are not ASCII. | 352 // strings are not ASCII. |
348 bool EqualsASCII(const string16& a, const base::StringPiece& b); | 353 BASE_API bool EqualsASCII(const string16& a, const base::StringPiece& b); |
349 | 354 |
350 // Returns true if str starts with search, or false otherwise. | 355 // Returns true if str starts with search, or false otherwise. |
351 bool StartsWithASCII(const std::string& str, | 356 BASE_API bool StartsWithASCII(const std::string& str, |
352 const std::string& search, | 357 const std::string& search, |
353 bool case_sensitive); | 358 bool case_sensitive); |
354 bool StartsWith(const std::wstring& str, | 359 BASE_API bool StartsWith(const std::wstring& str, |
355 const std::wstring& search, | 360 const std::wstring& search, |
356 bool case_sensitive); | 361 bool case_sensitive); |
357 bool StartsWith(const string16& str, | 362 BASE_API bool StartsWith(const string16& str, |
358 const string16& search, | 363 const string16& search, |
359 bool case_sensitive); | 364 bool case_sensitive); |
360 | 365 |
361 // Returns true if str ends with search, or false otherwise. | 366 // Returns true if str ends with search, or false otherwise. |
362 bool EndsWith(const std::string& str, | 367 BASE_API bool EndsWith(const std::string& str, |
363 const std::string& search, | 368 const std::string& search, |
364 bool case_sensitive); | 369 bool case_sensitive); |
365 bool EndsWith(const std::wstring& str, | 370 BASE_API bool EndsWith(const std::wstring& str, |
366 const std::wstring& search, | 371 const std::wstring& search, |
367 bool case_sensitive); | 372 bool case_sensitive); |
368 bool EndsWith(const string16& str, | 373 BASE_API bool EndsWith(const string16& str, |
369 const string16& search, | 374 const string16& search, |
370 bool case_sensitive); | 375 bool case_sensitive); |
371 | 376 |
372 | 377 |
373 // Determines the type of ASCII character, independent of locale (the C | 378 // Determines the type of ASCII character, independent of locale (the C |
374 // library versions will change based on locale). | 379 // library versions will change based on locale). |
375 template <typename Char> | 380 template <typename Char> |
376 inline bool IsAsciiWhitespace(Char c) { | 381 inline bool IsAsciiWhitespace(Char c) { |
377 return c == ' ' || c == '\r' || c == '\n' || c == '\t'; | 382 return c == ' ' || c == '\r' || c == '\n' || c == '\t'; |
378 } | 383 } |
379 template <typename Char> | 384 template <typename Char> |
380 inline bool IsAsciiAlpha(Char c) { | 385 inline bool IsAsciiAlpha(Char c) { |
(...skipping 30 matching lines...) Expand all Loading... |
411 | 416 |
412 enum DataUnits { | 417 enum DataUnits { |
413 DATA_UNITS_BYTE = 0, | 418 DATA_UNITS_BYTE = 0, |
414 DATA_UNITS_KIBIBYTE, | 419 DATA_UNITS_KIBIBYTE, |
415 DATA_UNITS_MEBIBYTE, | 420 DATA_UNITS_MEBIBYTE, |
416 DATA_UNITS_GIBIBYTE, | 421 DATA_UNITS_GIBIBYTE, |
417 }; | 422 }; |
418 | 423 |
419 // Return the unit type that is appropriate for displaying the amount of bytes | 424 // Return the unit type that is appropriate for displaying the amount of bytes |
420 // passed in. | 425 // passed in. |
421 DataUnits GetByteDisplayUnits(int64 bytes); | 426 BASE_API DataUnits GetByteDisplayUnits(int64 bytes); |
422 | 427 |
423 // Return a byte string in human-readable format, displayed in units appropriate | 428 // Return a byte string in human-readable format, displayed in units appropriate |
424 // specified by 'units', with an optional unit suffix. | 429 // specified by 'units', with an optional unit suffix. |
425 // Ex: FormatBytes(512, DATA_UNITS_KIBIBYTE, true) => "0.5 KB" | 430 // Ex: FormatBytes(512, DATA_UNITS_KIBIBYTE, true) => "0.5 KB" |
426 // Ex: FormatBytes(10*1024, DATA_UNITS_MEBIBYTE, false) => "0.1" | 431 // Ex: FormatBytes(10*1024, DATA_UNITS_MEBIBYTE, false) => "0.1" |
427 string16 FormatBytes(int64 bytes, DataUnits units, bool show_units); | 432 BASE_API string16 FormatBytes(int64 bytes, DataUnits units, bool show_units); |
428 | 433 |
429 // As above, but with "/s" units. | 434 // As above, but with "/s" units. |
430 // Ex: FormatSpeed(512, DATA_UNITS_KIBIBYTE, true) => "0.5 KB/s" | 435 // Ex: FormatSpeed(512, DATA_UNITS_KIBIBYTE, true) => "0.5 KB/s" |
431 // Ex: FormatSpeed(10*1024, DATA_UNITS_MEBIBYTE, false) => "0.1" | 436 // Ex: FormatSpeed(10*1024, DATA_UNITS_MEBIBYTE, false) => "0.1" |
432 string16 FormatSpeed(int64 bytes, DataUnits units, bool show_units); | 437 BASE_API string16 FormatSpeed(int64 bytes, DataUnits units, bool show_units); |
433 | 438 |
434 // Return a number formated with separators in the user's locale way. | 439 // Return a number formated with separators in the user's locale way. |
435 // Ex: FormatNumber(1234567) => 1,234,567 | 440 // Ex: FormatNumber(1234567) => 1,234,567 |
436 string16 FormatNumber(int64 number); | 441 BASE_API string16 FormatNumber(int64 number); |
437 | 442 |
438 // Starting at |start_offset| (usually 0), replace the first instance of | 443 // Starting at |start_offset| (usually 0), replace the first instance of |
439 // |find_this| with |replace_with|. | 444 // |find_this| with |replace_with|. |
440 void ReplaceFirstSubstringAfterOffset(string16* str, | 445 BASE_API void ReplaceFirstSubstringAfterOffset(string16* str, |
441 string16::size_type start_offset, | 446 string16::size_type start_offset, |
442 const string16& find_this, | 447 const string16& find_this, |
443 const string16& replace_with); | 448 const string16& replace_with); |
444 void ReplaceFirstSubstringAfterOffset(std::string* str, | 449 BASE_API void ReplaceFirstSubstringAfterOffset( |
445 std::string::size_type start_offset, | 450 std::string* str, |
446 const std::string& find_this, | 451 std::string::size_type start_offset, |
447 const std::string& replace_with); | 452 const std::string& find_this, |
| 453 const std::string& replace_with); |
448 | 454 |
449 // Starting at |start_offset| (usually 0), look through |str| and replace all | 455 // Starting at |start_offset| (usually 0), look through |str| and replace all |
450 // instances of |find_this| with |replace_with|. | 456 // instances of |find_this| with |replace_with|. |
451 // | 457 // |
452 // This does entire substrings; use std::replace in <algorithm> for single | 458 // This does entire substrings; use std::replace in <algorithm> for single |
453 // characters, for example: | 459 // characters, for example: |
454 // std::replace(str.begin(), str.end(), 'a', 'b'); | 460 // std::replace(str.begin(), str.end(), 'a', 'b'); |
455 void ReplaceSubstringsAfterOffset(string16* str, | 461 BASE_API void ReplaceSubstringsAfterOffset(string16* str, |
456 string16::size_type start_offset, | 462 string16::size_type start_offset, |
457 const string16& find_this, | 463 const string16& find_this, |
458 const string16& replace_with); | 464 const string16& replace_with); |
459 void ReplaceSubstringsAfterOffset(std::string* str, | 465 BASE_API void ReplaceSubstringsAfterOffset(std::string* str, |
460 std::string::size_type start_offset, | 466 std::string::size_type start_offset, |
461 const std::string& find_this, | 467 const std::string& find_this, |
462 const std::string& replace_with); | 468 const std::string& replace_with); |
463 | 469 |
464 // This is mpcomplete's pattern for saving a string copy when dealing with | 470 // This is mpcomplete's pattern for saving a string copy when dealing with |
465 // a function that writes results into a wchar_t[] and wanting the result to | 471 // a function that writes results into a wchar_t[] and wanting the result to |
466 // end up in a std::wstring. It ensures that the std::wstring's internal | 472 // end up in a std::wstring. It ensures that the std::wstring's internal |
467 // buffer has enough room to store the characters to be written into it, and | 473 // buffer has enough room to store the characters to be written into it, and |
468 // sets its .length() attribute to the right value. | 474 // sets its .length() attribute to the right value. |
469 // | 475 // |
470 // The reserve() call allocates the memory required to hold the string | 476 // The reserve() call allocates the memory required to hold the string |
471 // plus a terminating null. This is done because resize() isn't | 477 // plus a terminating null. This is done because resize() isn't |
472 // guaranteed to reserve space for the null. The resize() call is | 478 // guaranteed to reserve space for the null. The resize() call is |
473 // simply the only way to change the string's 'length' member. | 479 // simply the only way to change the string's 'length' member. |
474 // | 480 // |
475 // XXX-performance: the call to wide.resize() takes linear time, since it fills | 481 // XXX-performance: the call to wide.resize() takes linear time, since it fills |
476 // the string's buffer with nulls. I call it to change the length of the | 482 // the string's buffer with nulls. I call it to change the length of the |
477 // string (needed because writing directly to the buffer doesn't do this). | 483 // string (needed because writing directly to the buffer doesn't do this). |
478 // Perhaps there's a constant-time way to change the string's length. | 484 // Perhaps there's a constant-time way to change the string's length. |
479 template <class string_type> | 485 template <class string_type> |
480 inline typename string_type::value_type* WriteInto(string_type* str, | 486 inline typename string_type::value_type* WriteInto(string_type* str, |
481 size_t length_with_null) { | 487 size_t length_with_null) { |
482 str->reserve(length_with_null); | 488 str->reserve(length_with_null); |
483 str->resize(length_with_null - 1); | 489 str->resize(length_with_null - 1); |
484 return &((*str)[0]); | 490 return &((*str)[0]); |
485 } | 491 } |
486 | 492 |
487 //----------------------------------------------------------------------------- | 493 //----------------------------------------------------------------------------- |
488 | 494 |
489 // Splits a string into its fields delimited by any of the characters in | 495 // Splits a string into its fields delimited by any of the characters in |
490 // |delimiters|. Each field is added to the |tokens| vector. Returns the | 496 // |delimiters|. Each field is added to the |tokens| vector. Returns the |
491 // number of tokens found. | 497 // number of tokens found. |
492 size_t Tokenize(const std::wstring& str, | 498 BASE_API size_t Tokenize(const std::wstring& str, |
493 const std::wstring& delimiters, | 499 const std::wstring& delimiters, |
494 std::vector<std::wstring>* tokens); | 500 std::vector<std::wstring>* tokens); |
495 size_t Tokenize(const string16& str, | 501 BASE_API size_t Tokenize(const string16& str, |
496 const string16& delimiters, | 502 const string16& delimiters, |
497 std::vector<string16>* tokens); | 503 std::vector<string16>* tokens); |
498 size_t Tokenize(const std::string& str, | 504 BASE_API size_t Tokenize(const std::string& str, |
499 const std::string& delimiters, | 505 const std::string& delimiters, |
500 std::vector<std::string>* tokens); | 506 std::vector<std::string>* tokens); |
501 size_t Tokenize(const base::StringPiece& str, | 507 BASE_API size_t Tokenize(const base::StringPiece& str, |
502 const base::StringPiece& delimiters, | 508 const base::StringPiece& delimiters, |
503 std::vector<base::StringPiece>* tokens); | 509 std::vector<base::StringPiece>* tokens); |
504 | 510 |
505 // Does the opposite of SplitString(). | 511 // Does the opposite of SplitString(). |
506 string16 JoinString(const std::vector<string16>& parts, char16 s); | 512 BASE_API string16 JoinString(const std::vector<string16>& parts, char16 s); |
507 std::string JoinString(const std::vector<std::string>& parts, char s); | 513 BASE_API std::string JoinString(const std::vector<std::string>& parts, char s); |
508 | 514 |
509 // Replace $1-$2-$3..$9 in the format string with |a|-|b|-|c|..|i| respectively. | 515 // Replace $1-$2-$3..$9 in the format string with |a|-|b|-|c|..|i| respectively. |
510 // Additionally, any number of consecutive '$' characters is replaced by that | 516 // Additionally, any number of consecutive '$' characters is replaced by that |
511 // number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be | 517 // number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be |
512 // NULL. This only allows you to use up to nine replacements. | 518 // NULL. This only allows you to use up to nine replacements. |
513 string16 ReplaceStringPlaceholders(const string16& format_string, | 519 BASE_API string16 ReplaceStringPlaceholders(const string16& format_string, |
514 const std::vector<string16>& subst, | 520 const std::vector<string16>& subst, |
515 std::vector<size_t>* offsets); | 521 std::vector<size_t>* offsets); |
516 | 522 |
517 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string, | 523 BASE_API std::string ReplaceStringPlaceholders( |
518 const std::vector<std::string>& subst, | 524 const base::StringPiece& format_string, |
519 std::vector<size_t>* offsets); | 525 const std::vector<std::string>& subst, |
| 526 std::vector<size_t>* offsets); |
520 | 527 |
521 // Single-string shortcut for ReplaceStringHolders. |offset| may be NULL. | 528 // Single-string shortcut for ReplaceStringHolders. |offset| may be NULL. |
522 string16 ReplaceStringPlaceholders(const string16& format_string, | 529 BASE_API string16 ReplaceStringPlaceholders(const string16& format_string, |
523 const string16& a, | 530 const string16& a, |
524 size_t* offset); | 531 size_t* offset); |
525 | 532 |
526 // Returns true if the string passed in matches the pattern. The pattern | 533 // Returns true if the string passed in matches the pattern. The pattern |
527 // string can contain wildcards like * and ? | 534 // string can contain wildcards like * and ? |
528 // The backslash character (\) is an escape character for * and ? | 535 // The backslash character (\) is an escape character for * and ? |
529 // We limit the patterns to having a max of 16 * or ? characters. | 536 // We limit the patterns to having a max of 16 * or ? characters. |
530 // ? matches 0 or 1 character, while * matches 0 or more characters. | 537 // ? matches 0 or 1 character, while * matches 0 or more characters. |
531 bool MatchPattern(const base::StringPiece& string, | 538 BASE_API bool MatchPattern(const base::StringPiece& string, |
532 const base::StringPiece& pattern); | 539 const base::StringPiece& pattern); |
533 bool MatchPattern(const string16& string, const string16& pattern); | 540 BASE_API bool MatchPattern(const string16& string, const string16& pattern); |
534 | 541 |
535 // Hack to convert any char-like type to its unsigned counterpart. | 542 // Hack to convert any char-like type to its unsigned counterpart. |
536 // For example, it will convert char, signed char and unsigned char to unsigned | 543 // For example, it will convert char, signed char and unsigned char to unsigned |
537 // char. | 544 // char. |
538 template<typename T> | 545 template<typename T> |
539 struct ToUnsigned { | 546 struct ToUnsigned { |
540 typedef T Unsigned; | 547 typedef T Unsigned; |
541 }; | 548 }; |
542 | 549 |
543 template<> | 550 template<> |
(...skipping 11 matching lines...) Expand all Loading... |
555 #elif defined(WCHAR_T_IS_UTF32) | 562 #elif defined(WCHAR_T_IS_UTF32) |
556 typedef uint32 Unsigned; | 563 typedef uint32 Unsigned; |
557 #endif | 564 #endif |
558 }; | 565 }; |
559 template<> | 566 template<> |
560 struct ToUnsigned<short> { | 567 struct ToUnsigned<short> { |
561 typedef unsigned short Unsigned; | 568 typedef unsigned short Unsigned; |
562 }; | 569 }; |
563 | 570 |
564 #endif // BASE_STRING_UTIL_H_ | 571 #endif // BASE_STRING_UTIL_H_ |
OLD | NEW |