OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 // | 4 // |
5 // This file defines utility functions for working with strings. | 5 // This file defines utility functions for working with strings. |
6 | 6 |
7 #ifndef BASE_STRING_UTIL_H_ | 7 #ifndef BASE_STRING_UTIL_H_ |
8 #define BASE_STRING_UTIL_H_ | 8 #define BASE_STRING_UTIL_H_ |
9 #pragma once | 9 #pragma once |
10 | 10 |
11 #include <ctype.h> | 11 #include <ctype.h> |
12 #include <stdarg.h> // va_list | 12 #include <stdarg.h> // va_list |
13 | 13 |
14 #include <string> | 14 #include <string> |
15 #include <vector> | 15 #include <vector> |
16 | 16 |
17 #include "base/base_api.h" | 17 #include "base/base_export.h" |
18 #include "base/basictypes.h" | 18 #include "base/basictypes.h" |
19 #include "base/compiler_specific.h" | 19 #include "base/compiler_specific.h" |
20 #include "base/string16.h" | 20 #include "base/string16.h" |
21 #include "base/string_piece.h" // For implicit conversions. | 21 #include "base/string_piece.h" // For implicit conversions. |
22 | 22 |
23 // TODO(brettw) remove this dependency. Previously StringPrintf lived in this | 23 // TODO(brettw) remove this dependency. Previously StringPrintf lived in this |
24 // file. We need to convert the callers over to using stringprintf.h instead | 24 // file. We need to convert the callers over to using stringprintf.h instead |
25 // and then remove this. | 25 // and then remove this. |
26 #include "base/stringprintf.h" | 26 #include "base/stringprintf.h" |
27 | 27 |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
86 va_end(arguments); | 86 va_end(arguments); |
87 return result; | 87 return result; |
88 } | 88 } |
89 | 89 |
90 // BSD-style safe and consistent string copy functions. | 90 // BSD-style safe and consistent string copy functions. |
91 // Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|. | 91 // Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|. |
92 // Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as | 92 // Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as |
93 // long as |dst_size| is not 0. Returns the length of |src| in characters. | 93 // long as |dst_size| is not 0. Returns the length of |src| in characters. |
94 // If the return value is >= dst_size, then the output was truncated. | 94 // If the return value is >= dst_size, then the output was truncated. |
95 // NOTE: All sizes are in number of characters, NOT in bytes. | 95 // NOTE: All sizes are in number of characters, NOT in bytes. |
96 BASE_API size_t strlcpy(char* dst, const char* src, size_t dst_size); | 96 BASE_EXPORT size_t strlcpy(char* dst, const char* src, size_t dst_size); |
97 BASE_API size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size); | 97 BASE_EXPORT size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size); |
98 | 98 |
99 // Scan a wprintf format string to determine whether it's portable across a | 99 // Scan a wprintf format string to determine whether it's portable across a |
100 // variety of systems. This function only checks that the conversion | 100 // variety of systems. This function only checks that the conversion |
101 // specifiers used by the format string are supported and have the same meaning | 101 // specifiers used by the format string are supported and have the same meaning |
102 // on a variety of systems. It doesn't check for other errors that might occur | 102 // on a variety of systems. It doesn't check for other errors that might occur |
103 // within a format string. | 103 // within a format string. |
104 // | 104 // |
105 // Nonportable conversion specifiers for wprintf are: | 105 // Nonportable conversion specifiers for wprintf are: |
106 // - 's' and 'c' without an 'l' length modifier. %s and %c operate on char | 106 // - 's' and 'c' without an 'l' length modifier. %s and %c operate on char |
107 // data on all systems except Windows, which treat them as wchar_t data. | 107 // data on all systems except Windows, which treat them as wchar_t data. |
108 // Use %ls and %lc for wchar_t data instead. | 108 // Use %ls and %lc for wchar_t data instead. |
109 // - 'S' and 'C', which operate on wchar_t data on all systems except Windows, | 109 // - 'S' and 'C', which operate on wchar_t data on all systems except Windows, |
110 // which treat them as char data. Use %ls and %lc for wchar_t data | 110 // which treat them as char data. Use %ls and %lc for wchar_t data |
111 // instead. | 111 // instead. |
112 // - 'F', which is not identified by Windows wprintf documentation. | 112 // - 'F', which is not identified by Windows wprintf documentation. |
113 // - 'D', 'O', and 'U', which are deprecated and not available on all systems. | 113 // - 'D', 'O', and 'U', which are deprecated and not available on all systems. |
114 // Use %ld, %lo, and %lu instead. | 114 // Use %ld, %lo, and %lu instead. |
115 // | 115 // |
116 // Note that there is no portable conversion specifier for char data when | 116 // Note that there is no portable conversion specifier for char data when |
117 // working with wprintf. | 117 // working with wprintf. |
118 // | 118 // |
119 // This function is intended to be called from base::vswprintf. | 119 // This function is intended to be called from base::vswprintf. |
120 BASE_API bool IsWprintfFormatPortable(const wchar_t* format); | 120 BASE_EXPORT bool IsWprintfFormatPortable(const wchar_t* format); |
121 | 121 |
122 // ASCII-specific tolower. The standard library's tolower is locale sensitive, | 122 // ASCII-specific tolower. The standard library's tolower is locale sensitive, |
123 // so we don't want to use it here. | 123 // so we don't want to use it here. |
124 template <class Char> inline Char ToLowerASCII(Char c) { | 124 template <class Char> inline Char ToLowerASCII(Char c) { |
125 return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; | 125 return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; |
126 } | 126 } |
127 | 127 |
128 // ASCII-specific toupper. The standard library's toupper is locale sensitive, | 128 // ASCII-specific toupper. The standard library's toupper is locale sensitive, |
129 // so we don't want to use it here. | 129 // so we don't want to use it here. |
130 template <class Char> inline Char ToUpperASCII(Char c) { | 130 template <class Char> inline Char ToUpperASCII(Char c) { |
(...skipping 30 matching lines...) Expand all Loading... |
161 | 161 |
162 // These threadsafe functions return references to globally unique empty | 162 // These threadsafe functions return references to globally unique empty |
163 // strings. | 163 // strings. |
164 // | 164 // |
165 // DO NOT USE THESE AS A GENERAL-PURPOSE SUBSTITUTE FOR DEFAULT CONSTRUCTORS. | 165 // DO NOT USE THESE AS A GENERAL-PURPOSE SUBSTITUTE FOR DEFAULT CONSTRUCTORS. |
166 // There is only one case where you should use these: functions which need to | 166 // There is only one case where you should use these: functions which need to |
167 // return a string by reference (e.g. as a class member accessor), and don't | 167 // return a string by reference (e.g. as a class member accessor), and don't |
168 // have an empty string to use (e.g. in an error case). These should not be | 168 // have an empty string to use (e.g. in an error case). These should not be |
169 // used as initializers, function arguments, or return values for functions | 169 // used as initializers, function arguments, or return values for functions |
170 // which return by value or outparam. | 170 // which return by value or outparam. |
171 BASE_API const std::string& EmptyString(); | 171 BASE_EXPORT const std::string& EmptyString(); |
172 BASE_API const std::wstring& EmptyWString(); | 172 BASE_EXPORT const std::wstring& EmptyWString(); |
173 BASE_API const string16& EmptyString16(); | 173 BASE_EXPORT const string16& EmptyString16(); |
174 | 174 |
175 BASE_API extern const wchar_t kWhitespaceWide[]; | 175 BASE_EXPORT extern const wchar_t kWhitespaceWide[]; |
176 BASE_API extern const char16 kWhitespaceUTF16[]; | 176 BASE_EXPORT extern const char16 kWhitespaceUTF16[]; |
177 BASE_API extern const char kWhitespaceASCII[]; | 177 BASE_EXPORT extern const char kWhitespaceASCII[]; |
178 | 178 |
179 BASE_API extern const char kUtf8ByteOrderMark[]; | 179 BASE_EXPORT extern const char kUtf8ByteOrderMark[]; |
180 | 180 |
181 // Removes characters in remove_chars from anywhere in input. Returns true if | 181 // Removes characters in remove_chars from anywhere in input. Returns true if |
182 // any characters were removed. | 182 // any characters were removed. |
183 // NOTE: Safe to use the same variable for both input and output. | 183 // NOTE: Safe to use the same variable for both input and output. |
184 BASE_API bool RemoveChars(const string16& input, | 184 BASE_EXPORT bool RemoveChars(const string16& input, |
185 const char16 remove_chars[], | 185 const char16 remove_chars[], |
186 string16* output); | 186 string16* output); |
187 BASE_API bool RemoveChars(const std::string& input, | 187 BASE_EXPORT bool RemoveChars(const std::string& input, |
188 const char remove_chars[], | 188 const char remove_chars[], |
189 std::string* output); | 189 std::string* output); |
190 | 190 |
191 // Removes characters in trim_chars from the beginning and end of input. | 191 // Removes characters in trim_chars from the beginning and end of input. |
192 // NOTE: Safe to use the same variable for both input and output. | 192 // NOTE: Safe to use the same variable for both input and output. |
193 BASE_API bool TrimString(const std::wstring& input, | 193 BASE_EXPORT bool TrimString(const std::wstring& input, |
194 const wchar_t trim_chars[], | 194 const wchar_t trim_chars[], |
195 std::wstring* output); | 195 std::wstring* output); |
196 BASE_API bool TrimString(const string16& input, | 196 BASE_EXPORT bool TrimString(const string16& input, |
197 const char16 trim_chars[], | 197 const char16 trim_chars[], |
198 string16* output); | 198 string16* output); |
199 BASE_API bool TrimString(const std::string& input, | 199 BASE_EXPORT bool TrimString(const std::string& input, |
200 const char trim_chars[], | 200 const char trim_chars[], |
201 std::string* output); | 201 std::string* output); |
202 | 202 |
203 // Truncates a string to the nearest UTF-8 character that will leave | 203 // Truncates a string to the nearest UTF-8 character that will leave |
204 // the string less than or equal to the specified byte size. | 204 // the string less than or equal to the specified byte size. |
205 BASE_API void TruncateUTF8ToByteSize(const std::string& input, | 205 BASE_EXPORT void TruncateUTF8ToByteSize(const std::string& input, |
206 const size_t byte_size, | 206 const size_t byte_size, |
207 std::string* output); | 207 std::string* output); |
208 | 208 |
209 // Trims any whitespace from either end of the input string. Returns where | 209 // Trims any whitespace from either end of the input string. Returns where |
210 // whitespace was found. | 210 // whitespace was found. |
211 // The non-wide version has two functions: | 211 // The non-wide version has two functions: |
212 // * TrimWhitespaceASCII() | 212 // * TrimWhitespaceASCII() |
213 // This function is for ASCII strings and only looks for ASCII whitespace; | 213 // This function is for ASCII strings and only looks for ASCII whitespace; |
214 // Please choose the best one according to your usage. | 214 // Please choose the best one according to your usage. |
215 // NOTE: Safe to use the same variable for both input and output. | 215 // NOTE: Safe to use the same variable for both input and output. |
216 enum TrimPositions { | 216 enum TrimPositions { |
217 TRIM_NONE = 0, | 217 TRIM_NONE = 0, |
218 TRIM_LEADING = 1 << 0, | 218 TRIM_LEADING = 1 << 0, |
219 TRIM_TRAILING = 1 << 1, | 219 TRIM_TRAILING = 1 << 1, |
220 TRIM_ALL = TRIM_LEADING | TRIM_TRAILING, | 220 TRIM_ALL = TRIM_LEADING | TRIM_TRAILING, |
221 }; | 221 }; |
222 BASE_API TrimPositions TrimWhitespace(const string16& input, | 222 BASE_EXPORT TrimPositions TrimWhitespace(const string16& input, |
223 TrimPositions positions, | 223 TrimPositions positions, |
224 string16* output); | 224 string16* output); |
225 BASE_API TrimPositions TrimWhitespaceASCII(const std::string& input, | 225 BASE_EXPORT TrimPositions TrimWhitespaceASCII(const std::string& input, |
226 TrimPositions positions, | 226 TrimPositions positions, |
227 std::string* output); | 227 std::string* output); |
228 | 228 |
229 // Deprecated. This function is only for backward compatibility and calls | 229 // Deprecated. This function is only for backward compatibility and calls |
230 // TrimWhitespaceASCII(). | 230 // TrimWhitespaceASCII(). |
231 BASE_API TrimPositions TrimWhitespace(const std::string& input, | 231 BASE_EXPORT TrimPositions TrimWhitespace(const std::string& input, |
232 TrimPositions positions, | 232 TrimPositions positions, |
233 std::string* output); | 233 std::string* output); |
234 | 234 |
235 // Searches for CR or LF characters. Removes all contiguous whitespace | 235 // Searches for CR or LF characters. Removes all contiguous whitespace |
236 // strings that contain them. This is useful when trying to deal with text | 236 // strings that contain them. This is useful when trying to deal with text |
237 // copied from terminals. | 237 // copied from terminals. |
238 // Returns |text|, with the following three transformations: | 238 // Returns |text|, with the following three transformations: |
239 // (1) Leading and trailing whitespace is trimmed. | 239 // (1) Leading and trailing whitespace is trimmed. |
240 // (2) If |trim_sequences_with_line_breaks| is true, any other whitespace | 240 // (2) If |trim_sequences_with_line_breaks| is true, any other whitespace |
241 // sequences containing a CR or LF are trimmed. | 241 // sequences containing a CR or LF are trimmed. |
242 // (3) All other whitespace sequences are converted to single spaces. | 242 // (3) All other whitespace sequences are converted to single spaces. |
243 BASE_API std::wstring CollapseWhitespace(const std::wstring& text, | 243 BASE_EXPORT std::wstring CollapseWhitespace( |
244 bool trim_sequences_with_line_breaks); | 244 const std::wstring& text, |
245 BASE_API string16 CollapseWhitespace(const string16& text, | 245 bool trim_sequences_with_line_breaks); |
246 bool trim_sequences_with_line_breaks); | 246 BASE_EXPORT string16 CollapseWhitespace( |
247 BASE_API std::string CollapseWhitespaceASCII( | 247 const string16& text, |
248 const std::string& text, bool trim_sequences_with_line_breaks); | 248 bool trim_sequences_with_line_breaks); |
| 249 BASE_EXPORT std::string CollapseWhitespaceASCII( |
| 250 const std::string& text, |
| 251 bool trim_sequences_with_line_breaks); |
249 | 252 |
250 // Returns true if the passed string is empty or contains only white-space | 253 // Returns true if the passed string is empty or contains only white-space |
251 // characters. | 254 // characters. |
252 BASE_API bool ContainsOnlyWhitespaceASCII(const std::string& str); | 255 BASE_EXPORT bool ContainsOnlyWhitespaceASCII(const std::string& str); |
253 BASE_API bool ContainsOnlyWhitespace(const string16& str); | 256 BASE_EXPORT bool ContainsOnlyWhitespace(const string16& str); |
254 | 257 |
255 // Returns true if |input| is empty or contains only characters found in | 258 // Returns true if |input| is empty or contains only characters found in |
256 // |characters|. | 259 // |characters|. |
257 BASE_API bool ContainsOnlyChars(const std::wstring& input, | 260 BASE_EXPORT bool ContainsOnlyChars(const std::wstring& input, |
258 const std::wstring& characters); | 261 const std::wstring& characters); |
259 BASE_API bool ContainsOnlyChars(const string16& input, | 262 BASE_EXPORT bool ContainsOnlyChars(const string16& input, |
260 const string16& characters); | 263 const string16& characters); |
261 BASE_API bool ContainsOnlyChars(const std::string& input, | 264 BASE_EXPORT bool ContainsOnlyChars(const std::string& input, |
262 const std::string& characters); | 265 const std::string& characters); |
263 | 266 |
264 // Converts to 7-bit ASCII by truncating. The result must be known to be ASCII | 267 // Converts to 7-bit ASCII by truncating. The result must be known to be ASCII |
265 // beforehand. | 268 // beforehand. |
266 BASE_API std::string WideToASCII(const std::wstring& wide); | 269 BASE_EXPORT std::string WideToASCII(const std::wstring& wide); |
267 BASE_API std::string UTF16ToASCII(const string16& utf16); | 270 BASE_EXPORT std::string UTF16ToASCII(const string16& utf16); |
268 | 271 |
269 // Converts the given wide string to the corresponding Latin1. This will fail | 272 // Converts the given wide string to the corresponding Latin1. This will fail |
270 // (return false) if any characters are more than 255. | 273 // (return false) if any characters are more than 255. |
271 BASE_API bool WideToLatin1(const std::wstring& wide, std::string* latin1); | 274 BASE_EXPORT bool WideToLatin1(const std::wstring& wide, std::string* latin1); |
272 | 275 |
273 // Returns true if the specified string matches the criteria. How can a wide | 276 // Returns true if the specified string matches the criteria. How can a wide |
274 // string be 8-bit or UTF8? It contains only characters that are < 256 (in the | 277 // string be 8-bit or UTF8? It contains only characters that are < 256 (in the |
275 // first case) or characters that use only 8-bits and whose 8-bit | 278 // first case) or characters that use only 8-bits and whose 8-bit |
276 // representation looks like a UTF-8 string (the second case). | 279 // representation looks like a UTF-8 string (the second case). |
277 // | 280 // |
278 // Note that IsStringUTF8 checks not only if the input is structurally | 281 // Note that IsStringUTF8 checks not only if the input is structurally |
279 // valid but also if it doesn't contain any non-character codepoint | 282 // valid but also if it doesn't contain any non-character codepoint |
280 // (e.g. U+FFFE). It's done on purpose because all the existing callers want | 283 // (e.g. U+FFFE). It's done on purpose because all the existing callers want |
281 // to have the maximum 'discriminating' power from other encodings. If | 284 // to have the maximum 'discriminating' power from other encodings. If |
282 // there's a use case for just checking the structural validity, we have to | 285 // there's a use case for just checking the structural validity, we have to |
283 // add a new function for that. | 286 // add a new function for that. |
284 BASE_API bool IsStringUTF8(const std::string& str); | 287 BASE_EXPORT bool IsStringUTF8(const std::string& str); |
285 BASE_API bool IsStringASCII(const std::wstring& str); | 288 BASE_EXPORT bool IsStringASCII(const std::wstring& str); |
286 BASE_API bool IsStringASCII(const base::StringPiece& str); | 289 BASE_EXPORT bool IsStringASCII(const base::StringPiece& str); |
287 BASE_API bool IsStringASCII(const string16& str); | 290 BASE_EXPORT bool IsStringASCII(const string16& str); |
288 | 291 |
289 // Converts the elements of the given string. This version uses a pointer to | 292 // Converts the elements of the given string. This version uses a pointer to |
290 // clearly differentiate it from the non-pointer variant. | 293 // clearly differentiate it from the non-pointer variant. |
291 template <class str> inline void StringToLowerASCII(str* s) { | 294 template <class str> inline void StringToLowerASCII(str* s) { |
292 for (typename str::iterator i = s->begin(); i != s->end(); ++i) | 295 for (typename str::iterator i = s->begin(); i != s->end(); ++i) |
293 *i = base::ToLowerASCII(*i); | 296 *i = base::ToLowerASCII(*i); |
294 } | 297 } |
295 | 298 |
296 template <class str> inline str StringToLowerASCII(const str& s) { | 299 template <class str> inline str StringToLowerASCII(const str& s) { |
297 // for std::string and std::wstring | 300 // for std::string and std::wstring |
(...skipping 13 matching lines...) Expand all Loading... |
311 // for std::string and std::wstring | 314 // for std::string and std::wstring |
312 str output(s); | 315 str output(s); |
313 StringToUpperASCII(&output); | 316 StringToUpperASCII(&output); |
314 return output; | 317 return output; |
315 } | 318 } |
316 | 319 |
317 // Compare the lower-case form of the given string against the given ASCII | 320 // Compare the lower-case form of the given string against the given ASCII |
318 // string. This is useful for doing checking if an input string matches some | 321 // string. This is useful for doing checking if an input string matches some |
319 // token, and it is optimized to avoid intermediate string copies. This API is | 322 // token, and it is optimized to avoid intermediate string copies. This API is |
320 // borrowed from the equivalent APIs in Mozilla. | 323 // borrowed from the equivalent APIs in Mozilla. |
321 BASE_API bool LowerCaseEqualsASCII(const std::string& a, const char* b); | 324 BASE_EXPORT bool LowerCaseEqualsASCII(const std::string& a, const char* b); |
322 BASE_API bool LowerCaseEqualsASCII(const std::wstring& a, const char* b); | 325 BASE_EXPORT bool LowerCaseEqualsASCII(const std::wstring& a, const char* b); |
323 BASE_API bool LowerCaseEqualsASCII(const string16& a, const char* b); | 326 BASE_EXPORT bool LowerCaseEqualsASCII(const string16& a, const char* b); |
324 | 327 |
325 // Same thing, but with string iterators instead. | 328 // Same thing, but with string iterators instead. |
326 BASE_API bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, | 329 BASE_EXPORT bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, |
327 std::string::const_iterator a_end, | 330 std::string::const_iterator a_end, |
328 const char* b); | 331 const char* b); |
329 BASE_API bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin, | 332 BASE_EXPORT bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin, |
330 std::wstring::const_iterator a_end, | 333 std::wstring::const_iterator a_end, |
331 const char* b); | 334 const char* b); |
332 BASE_API bool LowerCaseEqualsASCII(string16::const_iterator a_begin, | 335 BASE_EXPORT bool LowerCaseEqualsASCII(string16::const_iterator a_begin, |
333 string16::const_iterator a_end, | 336 string16::const_iterator a_end, |
334 const char* b); | 337 const char* b); |
335 BASE_API bool LowerCaseEqualsASCII(const char* a_begin, | 338 BASE_EXPORT bool LowerCaseEqualsASCII(const char* a_begin, |
336 const char* a_end, | 339 const char* a_end, |
337 const char* b); | 340 const char* b); |
338 BASE_API bool LowerCaseEqualsASCII(const wchar_t* a_begin, | 341 BASE_EXPORT bool LowerCaseEqualsASCII(const wchar_t* a_begin, |
339 const wchar_t* a_end, | 342 const wchar_t* a_end, |
340 const char* b); | 343 const char* b); |
341 BASE_API bool LowerCaseEqualsASCII(const char16* a_begin, | 344 BASE_EXPORT bool LowerCaseEqualsASCII(const char16* a_begin, |
342 const char16* a_end, | 345 const char16* a_end, |
343 const char* b); | 346 const char* b); |
344 | 347 |
345 // Performs a case-sensitive string compare. The behavior is undefined if both | 348 // Performs a case-sensitive string compare. The behavior is undefined if both |
346 // strings are not ASCII. | 349 // strings are not ASCII. |
347 BASE_API bool EqualsASCII(const string16& a, const base::StringPiece& b); | 350 BASE_EXPORT bool EqualsASCII(const string16& a, const base::StringPiece& b); |
348 | 351 |
349 // Returns true if str starts with search, or false otherwise. | 352 // Returns true if str starts with search, or false otherwise. |
350 BASE_API bool StartsWithASCII(const std::string& str, | 353 BASE_EXPORT bool StartsWithASCII(const std::string& str, |
351 const std::string& search, | 354 const std::string& search, |
352 bool case_sensitive); | 355 bool case_sensitive); |
353 BASE_API bool StartsWith(const std::wstring& str, | 356 BASE_EXPORT bool StartsWith(const std::wstring& str, |
354 const std::wstring& search, | 357 const std::wstring& search, |
355 bool case_sensitive); | 358 bool case_sensitive); |
356 BASE_API bool StartsWith(const string16& str, | 359 BASE_EXPORT bool StartsWith(const string16& str, |
357 const string16& search, | 360 const string16& search, |
358 bool case_sensitive); | 361 bool case_sensitive); |
359 | 362 |
360 // Returns true if str ends with search, or false otherwise. | 363 // Returns true if str ends with search, or false otherwise. |
361 BASE_API bool EndsWith(const std::string& str, | 364 BASE_EXPORT bool EndsWith(const std::string& str, |
362 const std::string& search, | 365 const std::string& search, |
363 bool case_sensitive); | 366 bool case_sensitive); |
364 BASE_API bool EndsWith(const std::wstring& str, | 367 BASE_EXPORT bool EndsWith(const std::wstring& str, |
365 const std::wstring& search, | 368 const std::wstring& search, |
366 bool case_sensitive); | 369 bool case_sensitive); |
367 BASE_API bool EndsWith(const string16& str, | 370 BASE_EXPORT bool EndsWith(const string16& str, |
368 const string16& search, | 371 const string16& search, |
369 bool case_sensitive); | 372 bool case_sensitive); |
370 | 373 |
371 | 374 |
372 // Determines the type of ASCII character, independent of locale (the C | 375 // Determines the type of ASCII character, independent of locale (the C |
373 // library versions will change based on locale). | 376 // library versions will change based on locale). |
374 template <typename Char> | 377 template <typename Char> |
375 inline bool IsAsciiWhitespace(Char c) { | 378 inline bool IsAsciiWhitespace(Char c) { |
376 return c == ' ' || c == '\r' || c == '\n' || c == '\t'; | 379 return c == ' ' || c == '\r' || c == '\n' || c == '\t'; |
377 } | 380 } |
378 template <typename Char> | 381 template <typename Char> |
379 inline bool IsAsciiAlpha(Char c) { | 382 inline bool IsAsciiAlpha(Char c) { |
(...skipping 25 matching lines...) Expand all Loading... |
405 | 408 |
406 // Returns true if it's a whitespace character. | 409 // Returns true if it's a whitespace character. |
407 inline bool IsWhitespace(wchar_t c) { | 410 inline bool IsWhitespace(wchar_t c) { |
408 return wcschr(kWhitespaceWide, c) != NULL; | 411 return wcschr(kWhitespaceWide, c) != NULL; |
409 } | 412 } |
410 | 413 |
411 // Return a byte string in human-readable format with a unit suffix. Not | 414 // Return a byte string in human-readable format with a unit suffix. Not |
412 // appropriate for use in any UI; use of FormatBytes and friends in ui/base is | 415 // appropriate for use in any UI; use of FormatBytes and friends in ui/base is |
413 // highly recommended instead. TODO(avi): Figure out how to get callers to use | 416 // highly recommended instead. TODO(avi): Figure out how to get callers to use |
414 // FormatBytes instead; remove this. | 417 // FormatBytes instead; remove this. |
415 BASE_API string16 FormatBytesUnlocalized(int64 bytes); | 418 BASE_EXPORT string16 FormatBytesUnlocalized(int64 bytes); |
416 | 419 |
417 // Starting at |start_offset| (usually 0), replace the first instance of | 420 // Starting at |start_offset| (usually 0), replace the first instance of |
418 // |find_this| with |replace_with|. | 421 // |find_this| with |replace_with|. |
419 BASE_API void ReplaceFirstSubstringAfterOffset(string16* str, | 422 BASE_EXPORT void ReplaceFirstSubstringAfterOffset( |
420 string16::size_type start_offset, | 423 string16* str, |
421 const string16& find_this, | 424 string16::size_type start_offset, |
422 const string16& replace_with); | 425 const string16& find_this, |
423 BASE_API void ReplaceFirstSubstringAfterOffset( | 426 const string16& replace_with); |
| 427 BASE_EXPORT void ReplaceFirstSubstringAfterOffset( |
424 std::string* str, | 428 std::string* str, |
425 std::string::size_type start_offset, | 429 std::string::size_type start_offset, |
426 const std::string& find_this, | 430 const std::string& find_this, |
427 const std::string& replace_with); | 431 const std::string& replace_with); |
428 | 432 |
429 // Starting at |start_offset| (usually 0), look through |str| and replace all | 433 // Starting at |start_offset| (usually 0), look through |str| and replace all |
430 // instances of |find_this| with |replace_with|. | 434 // instances of |find_this| with |replace_with|. |
431 // | 435 // |
432 // This does entire substrings; use std::replace in <algorithm> for single | 436 // This does entire substrings; use std::replace in <algorithm> for single |
433 // characters, for example: | 437 // characters, for example: |
434 // std::replace(str.begin(), str.end(), 'a', 'b'); | 438 // std::replace(str.begin(), str.end(), 'a', 'b'); |
435 BASE_API void ReplaceSubstringsAfterOffset(string16* str, | 439 BASE_EXPORT void ReplaceSubstringsAfterOffset( |
436 string16::size_type start_offset, | 440 string16* str, |
437 const string16& find_this, | 441 string16::size_type start_offset, |
438 const string16& replace_with); | 442 const string16& find_this, |
439 BASE_API void ReplaceSubstringsAfterOffset(std::string* str, | 443 const string16& replace_with); |
440 std::string::size_type start_offset, | 444 BASE_EXPORT void ReplaceSubstringsAfterOffset( |
441 const std::string& find_this, | 445 std::string* str, |
442 const std::string& replace_with); | 446 std::string::size_type start_offset, |
| 447 const std::string& find_this, |
| 448 const std::string& replace_with); |
443 | 449 |
444 // This is mpcomplete's pattern for saving a string copy when dealing with | 450 // This is mpcomplete's pattern for saving a string copy when dealing with |
445 // a function that writes results into a wchar_t[] and wanting the result to | 451 // a function that writes results into a wchar_t[] and wanting the result to |
446 // end up in a std::wstring. It ensures that the std::wstring's internal | 452 // end up in a std::wstring. It ensures that the std::wstring's internal |
447 // buffer has enough room to store the characters to be written into it, and | 453 // buffer has enough room to store the characters to be written into it, and |
448 // sets its .length() attribute to the right value. | 454 // sets its .length() attribute to the right value. |
449 // | 455 // |
450 // The reserve() call allocates the memory required to hold the string | 456 // The reserve() call allocates the memory required to hold the string |
451 // plus a terminating null. This is done because resize() isn't | 457 // plus a terminating null. This is done because resize() isn't |
452 // guaranteed to reserve space for the null. The resize() call is | 458 // guaranteed to reserve space for the null. The resize() call is |
453 // simply the only way to change the string's 'length' member. | 459 // simply the only way to change the string's 'length' member. |
454 // | 460 // |
455 // XXX-performance: the call to wide.resize() takes linear time, since it fills | 461 // XXX-performance: the call to wide.resize() takes linear time, since it fills |
456 // the string's buffer with nulls. I call it to change the length of the | 462 // the string's buffer with nulls. I call it to change the length of the |
457 // string (needed because writing directly to the buffer doesn't do this). | 463 // string (needed because writing directly to the buffer doesn't do this). |
458 // Perhaps there's a constant-time way to change the string's length. | 464 // Perhaps there's a constant-time way to change the string's length. |
459 template <class string_type> | 465 template <class string_type> |
460 inline typename string_type::value_type* WriteInto(string_type* str, | 466 inline typename string_type::value_type* WriteInto(string_type* str, |
461 size_t length_with_null) { | 467 size_t length_with_null) { |
462 str->reserve(length_with_null); | 468 str->reserve(length_with_null); |
463 str->resize(length_with_null - 1); | 469 str->resize(length_with_null - 1); |
464 return &((*str)[0]); | 470 return &((*str)[0]); |
465 } | 471 } |
466 | 472 |
467 //----------------------------------------------------------------------------- | 473 //----------------------------------------------------------------------------- |
468 | 474 |
469 // Splits a string into its fields delimited by any of the characters in | 475 // Splits a string into its fields delimited by any of the characters in |
470 // |delimiters|. Each field is added to the |tokens| vector. Returns the | 476 // |delimiters|. Each field is added to the |tokens| vector. Returns the |
471 // number of tokens found. | 477 // number of tokens found. |
472 BASE_API size_t Tokenize(const std::wstring& str, | 478 BASE_EXPORT size_t Tokenize(const std::wstring& str, |
473 const std::wstring& delimiters, | 479 const std::wstring& delimiters, |
474 std::vector<std::wstring>* tokens); | 480 std::vector<std::wstring>* tokens); |
475 BASE_API size_t Tokenize(const string16& str, | 481 BASE_EXPORT size_t Tokenize(const string16& str, |
476 const string16& delimiters, | 482 const string16& delimiters, |
477 std::vector<string16>* tokens); | 483 std::vector<string16>* tokens); |
478 BASE_API size_t Tokenize(const std::string& str, | 484 BASE_EXPORT size_t Tokenize(const std::string& str, |
479 const std::string& delimiters, | 485 const std::string& delimiters, |
480 std::vector<std::string>* tokens); | 486 std::vector<std::string>* tokens); |
481 BASE_API size_t Tokenize(const base::StringPiece& str, | 487 BASE_EXPORT size_t Tokenize(const base::StringPiece& str, |
482 const base::StringPiece& delimiters, | 488 const base::StringPiece& delimiters, |
483 std::vector<base::StringPiece>* tokens); | 489 std::vector<base::StringPiece>* tokens); |
484 | 490 |
485 // Does the opposite of SplitString(). | 491 // Does the opposite of SplitString(). |
486 BASE_API string16 JoinString(const std::vector<string16>& parts, char16 s); | 492 BASE_EXPORT string16 JoinString(const std::vector<string16>& parts, char16 s); |
487 BASE_API std::string JoinString(const std::vector<std::string>& parts, char s); | 493 BASE_EXPORT std::string JoinString( |
| 494 const std::vector<std::string>& parts, char s); |
488 | 495 |
489 // Replace $1-$2-$3..$9 in the format string with |a|-|b|-|c|..|i| respectively. | 496 // Replace $1-$2-$3..$9 in the format string with |a|-|b|-|c|..|i| respectively. |
490 // Additionally, any number of consecutive '$' characters is replaced by that | 497 // Additionally, any number of consecutive '$' characters is replaced by that |
491 // number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be | 498 // number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be |
492 // NULL. This only allows you to use up to nine replacements. | 499 // NULL. This only allows you to use up to nine replacements. |
493 BASE_API string16 ReplaceStringPlaceholders(const string16& format_string, | 500 BASE_EXPORT string16 ReplaceStringPlaceholders( |
494 const std::vector<string16>& subst, | 501 const string16& format_string, |
495 std::vector<size_t>* offsets); | 502 const std::vector<string16>& subst, |
| 503 std::vector<size_t>* offsets); |
496 | 504 |
497 BASE_API std::string ReplaceStringPlaceholders( | 505 BASE_EXPORT std::string ReplaceStringPlaceholders( |
498 const base::StringPiece& format_string, | 506 const base::StringPiece& format_string, |
499 const std::vector<std::string>& subst, | 507 const std::vector<std::string>& subst, |
500 std::vector<size_t>* offsets); | 508 std::vector<size_t>* offsets); |
501 | 509 |
502 // Single-string shortcut for ReplaceStringHolders. |offset| may be NULL. | 510 // Single-string shortcut for ReplaceStringHolders. |offset| may be NULL. |
503 BASE_API string16 ReplaceStringPlaceholders(const string16& format_string, | 511 BASE_EXPORT string16 ReplaceStringPlaceholders(const string16& format_string, |
504 const string16& a, | 512 const string16& a, |
505 size_t* offset); | 513 size_t* offset); |
506 | 514 |
507 // Returns true if the string passed in matches the pattern. The pattern | 515 // Returns true if the string passed in matches the pattern. The pattern |
508 // string can contain wildcards like * and ? | 516 // string can contain wildcards like * and ? |
509 // The backslash character (\) is an escape character for * and ? | 517 // The backslash character (\) is an escape character for * and ? |
510 // We limit the patterns to having a max of 16 * or ? characters. | 518 // We limit the patterns to having a max of 16 * or ? characters. |
511 // ? matches 0 or 1 character, while * matches 0 or more characters. | 519 // ? matches 0 or 1 character, while * matches 0 or more characters. |
512 BASE_API bool MatchPattern(const base::StringPiece& string, | 520 BASE_EXPORT bool MatchPattern(const base::StringPiece& string, |
513 const base::StringPiece& pattern); | 521 const base::StringPiece& pattern); |
514 BASE_API bool MatchPattern(const string16& string, const string16& pattern); | 522 BASE_EXPORT bool MatchPattern(const string16& string, const string16& pattern); |
515 | 523 |
516 // Hack to convert any char-like type to its unsigned counterpart. | 524 // Hack to convert any char-like type to its unsigned counterpart. |
517 // For example, it will convert char, signed char and unsigned char to unsigned | 525 // For example, it will convert char, signed char and unsigned char to unsigned |
518 // char. | 526 // char. |
519 template<typename T> | 527 template<typename T> |
520 struct ToUnsigned { | 528 struct ToUnsigned { |
521 typedef T Unsigned; | 529 typedef T Unsigned; |
522 }; | 530 }; |
523 | 531 |
524 template<> | 532 template<> |
(...skipping 11 matching lines...) Expand all Loading... |
536 #elif defined(WCHAR_T_IS_UTF32) | 544 #elif defined(WCHAR_T_IS_UTF32) |
537 typedef uint32 Unsigned; | 545 typedef uint32 Unsigned; |
538 #endif | 546 #endif |
539 }; | 547 }; |
540 template<> | 548 template<> |
541 struct ToUnsigned<short> { | 549 struct ToUnsigned<short> { |
542 typedef unsigned short Unsigned; | 550 typedef unsigned short Unsigned; |
543 }; | 551 }; |
544 | 552 |
545 #endif // BASE_STRING_UTIL_H_ | 553 #endif // BASE_STRING_UTIL_H_ |
OLD | NEW |