Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 // | |
| 5 // This file defines utility functions for working with strings. | |
| 6 | 4 |
| 7 #ifndef BASE_STRING_UTIL_H_ | 5 #ifndef BASE_STRING_UTIL_STATIC_H_ |
|
brettw
2011/04/20 16:03:41
Why is this file necessary? It seems confusing to
rvargas (doing something else)
2011/04/20 21:23:58
It seemed the same use case as the base switches.
| |
| 8 #define BASE_STRING_UTIL_H_ | 6 #define BASE_STRING_UTIL_STATIC_H_ |
| 9 #pragma once | 7 #pragma once |
| 10 | 8 |
| 11 #include <stdarg.h> // va_list | 9 #include "base/basictypes.h" |
| 12 | 10 |
| 13 #include <string> | 11 // Avoid including base/string16.h from this file. |
| 14 #include <vector> | 12 #if defined(WCHAR_T_IS_UTF16) |
| 15 | 13 typedef wchar_t char16; |
| 16 #include "base/base_api.h" | 14 #elif defined(WCHAR_T_IS_UTF32) |
| 17 #include "base/basictypes.h" | 15 typedef uint16 char16; |
| 18 #include "base/compiler_specific.h" | |
| 19 #include "base/string16.h" | |
| 20 #include "base/string_piece.h" // For implicit conversions. | |
| 21 | |
| 22 // TODO(brettw) remove this dependency. Previously StringPrintf lived in this | |
| 23 // file. We need to convert the callers over to using stringprintf.h instead | |
| 24 // and then remove this. | |
| 25 #include "base/stringprintf.h" | |
| 26 | |
| 27 // Safe standard library wrappers for all platforms. | |
| 28 | |
| 29 namespace base { | |
| 30 | |
| 31 // C standard-library functions like "strncasecmp" and "snprintf" that aren't | |
| 32 // cross-platform are provided as "base::strncasecmp", and their prototypes | |
| 33 // are listed below. These functions are then implemented as inline calls | |
| 34 // to the platform-specific equivalents in the platform-specific headers. | |
| 35 | |
| 36 // Compares the two strings s1 and s2 without regard to case using | |
| 37 // the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if | |
| 38 // s2 > s1 according to a lexicographic comparison. | |
| 39 BASE_API int strcasecmp(const char* s1, const char* s2); | |
| 40 | |
| 41 // Compares up to count characters of s1 and s2 without regard to case using | |
| 42 // the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if | |
| 43 // s2 > s1 according to a lexicographic comparison. | |
| 44 BASE_API int strncasecmp(const char* s1, const char* s2, size_t count); | |
| 45 | |
| 46 // Same as strncmp but for char16 strings. | |
| 47 BASE_API int strncmp16(const char16* s1, const char16* s2, size_t count); | |
| 48 | |
| 49 // Wrapper for vsnprintf that always null-terminates and always returns the | |
| 50 // number of characters that would be in an untruncated formatted | |
| 51 // string, even when truncation occurs. | |
| 52 BASE_API int vsnprintf(char* buffer, size_t size, const char* format, | |
| 53 va_list arguments) | |
| 54 PRINTF_FORMAT(3, 0); | |
| 55 | |
| 56 // vswprintf always null-terminates, but when truncation occurs, it will either | |
| 57 // return -1 or the number of characters that would be in an untruncated | |
| 58 // formatted string. The actual return value depends on the underlying | |
| 59 // C library's vswprintf implementation. | |
| 60 BASE_API int vswprintf(wchar_t* buffer, size_t size, | |
| 61 const wchar_t* format, va_list arguments) | |
| 62 WPRINTF_FORMAT(3, 0); | |
| 63 | |
| 64 // Some of these implementations need to be inlined. | |
| 65 | |
| 66 // We separate the declaration from the implementation of this inline | |
| 67 // function just so the PRINTF_FORMAT works. | |
| 68 inline int snprintf(char* buffer, size_t size, const char* format, ...) | |
| 69 PRINTF_FORMAT(3, 4); | |
| 70 inline int snprintf(char* buffer, size_t size, const char* format, ...) { | |
| 71 va_list arguments; | |
| 72 va_start(arguments, format); | |
| 73 int result = vsnprintf(buffer, size, format, arguments); | |
| 74 va_end(arguments); | |
| 75 return result; | |
| 76 } | |
| 77 | |
| 78 // We separate the declaration from the implementation of this inline | |
| 79 // function just so the WPRINTF_FORMAT works. | |
| 80 inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...) | |
| 81 WPRINTF_FORMAT(3, 4); | |
| 82 inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...) { | |
| 83 va_list arguments; | |
| 84 va_start(arguments, format); | |
| 85 int result = vswprintf(buffer, size, format, arguments); | |
| 86 va_end(arguments); | |
| 87 return result; | |
| 88 } | |
| 89 | |
| 90 // BSD-style safe and consistent string copy functions. | |
| 91 // Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|. | |
| 92 // Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as | |
| 93 // long as |dst_size| is not 0. Returns the length of |src| in characters. | |
| 94 // If the return value is >= dst_size, then the output was truncated. | |
| 95 // NOTE: All sizes are in number of characters, NOT in bytes. | |
| 96 BASE_API size_t strlcpy(char* dst, const char* src, size_t dst_size); | |
| 97 BASE_API size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size); | |
| 98 | |
| 99 // Scan a wprintf format string to determine whether it's portable across a | |
| 100 // variety of systems. This function only checks that the conversion | |
| 101 // specifiers used by the format string are supported and have the same meaning | |
| 102 // on a variety of systems. It doesn't check for other errors that might occur | |
| 103 // within a format string. | |
| 104 // | |
| 105 // Nonportable conversion specifiers for wprintf are: | |
| 106 // - 's' and 'c' without an 'l' length modifier. %s and %c operate on char | |
| 107 // data on all systems except Windows, which treat them as wchar_t data. | |
| 108 // Use %ls and %lc for wchar_t data instead. | |
| 109 // - 'S' and 'C', which operate on wchar_t data on all systems except Windows, | |
| 110 // which treat them as char data. Use %ls and %lc for wchar_t data | |
| 111 // instead. | |
| 112 // - 'F', which is not identified by Windows wprintf documentation. | |
| 113 // - 'D', 'O', and 'U', which are deprecated and not available on all systems. | |
| 114 // Use %ld, %lo, and %lu instead. | |
| 115 // | |
| 116 // Note that there is no portable conversion specifier for char data when | |
| 117 // working with wprintf. | |
| 118 // | |
| 119 // This function is intended to be called from base::vswprintf. | |
| 120 BASE_API bool IsWprintfFormatPortable(const wchar_t* format); | |
| 121 | |
| 122 // ASCII-specific tolower. The standard library's tolower is locale sensitive, | |
| 123 // so we don't want to use it here. | |
| 124 template <class Char> inline Char ToLowerASCII(Char c) { | |
| 125 return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; | |
| 126 } | |
| 127 | |
| 128 // ASCII-specific toupper. The standard library's toupper is locale sensitive, | |
| 129 // so we don't want to use it here. | |
| 130 template <class Char> inline Char ToUpperASCII(Char c) { | |
| 131 return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c; | |
| 132 } | |
| 133 | |
| 134 // Function objects to aid in comparing/searching strings. | |
| 135 | |
| 136 template<typename Char> struct CaseInsensitiveCompare { | |
| 137 public: | |
| 138 bool operator()(Char x, Char y) const { | |
| 139 // TODO(darin): Do we really want to do locale sensitive comparisons here? | |
| 140 // See http://crbug.com/24917 | |
| 141 return tolower(x) == tolower(y); | |
| 142 } | |
| 143 }; | |
| 144 | |
| 145 template<typename Char> struct CaseInsensitiveCompareASCII { | |
| 146 public: | |
| 147 bool operator()(Char x, Char y) const { | |
| 148 return ToLowerASCII(x) == ToLowerASCII(y); | |
| 149 } | |
| 150 }; | |
| 151 | |
| 152 } // namespace base | |
| 153 | |
| 154 #if defined(OS_WIN) | |
| 155 #include "base/string_util_win.h" | |
| 156 #elif defined(OS_POSIX) | |
| 157 #include "base/string_util_posix.h" | |
| 158 #else | |
| 159 #error Define string operations appropriately for your platform | |
| 160 #endif | 16 #endif |
| 161 | 17 |
| 162 // These threadsafe functions return references to globally unique empty | |
| 163 // strings. | |
| 164 // | |
| 165 // DO NOT USE THESE AS A GENERAL-PURPOSE SUBSTITUTE FOR DEFAULT CONSTRUCTORS. | |
| 166 // There is only one case where you should use these: functions which need to | |
| 167 // return a string by reference (e.g. as a class member accessor), and don't | |
| 168 // have an empty string to use (e.g. in an error case). These should not be | |
| 169 // used as initializers, function arguments, or return values for functions | |
| 170 // which return by value or outparam. | |
| 171 BASE_API const std::string& EmptyString(); | |
| 172 BASE_API const std::wstring& EmptyWString(); | |
| 173 BASE_API const string16& EmptyString16(); | |
| 174 | |
| 175 extern const wchar_t kWhitespaceWide[]; | 18 extern const wchar_t kWhitespaceWide[]; |
| 176 extern const char16 kWhitespaceUTF16[]; | 19 extern const char16 kWhitespaceUTF16[]; |
| 177 extern const char kWhitespaceASCII[]; | 20 extern const char kWhitespaceASCII[]; |
| 178 | |
| 179 extern const char kUtf8ByteOrderMark[]; | 21 extern const char kUtf8ByteOrderMark[]; |
| 180 | 22 |
| 181 // Removes characters in remove_chars from anywhere in input. Returns true if | 23 #endif // BASE_STRING_UTIL_STATIC_H_ |
| 182 // any characters were removed. | |
| 183 // NOTE: Safe to use the same variable for both input and output. | |
| 184 BASE_API bool RemoveChars(const std::wstring& input, | |
| 185 const wchar_t remove_chars[], | |
| 186 std::wstring* output); | |
| 187 BASE_API bool RemoveChars(const string16& input, | |
| 188 const char16 remove_chars[], | |
| 189 string16* output); | |
| 190 BASE_API bool RemoveChars(const std::string& input, | |
| 191 const char remove_chars[], | |
| 192 std::string* output); | |
| 193 | |
| 194 // Removes characters in trim_chars from the beginning and end of input. | |
| 195 // NOTE: Safe to use the same variable for both input and output. | |
| 196 BASE_API bool TrimString(const std::wstring& input, | |
| 197 const wchar_t trim_chars[], | |
| 198 std::wstring* output); | |
| 199 BASE_API bool TrimString(const string16& input, | |
| 200 const char16 trim_chars[], | |
| 201 string16* output); | |
| 202 BASE_API bool TrimString(const std::string& input, | |
| 203 const char trim_chars[], | |
| 204 std::string* output); | |
| 205 | |
| 206 // Truncates a string to the nearest UTF-8 character that will leave | |
| 207 // the string less than or equal to the specified byte size. | |
| 208 BASE_API void TruncateUTF8ToByteSize(const std::string& input, | |
| 209 const size_t byte_size, | |
| 210 std::string* output); | |
| 211 | |
| 212 // Trims any whitespace from either end of the input string. Returns where | |
| 213 // whitespace was found. | |
| 214 // The non-wide version has two functions: | |
| 215 // * TrimWhitespaceASCII() | |
| 216 // This function is for ASCII strings and only looks for ASCII whitespace; | |
| 217 // Please choose the best one according to your usage. | |
| 218 // NOTE: Safe to use the same variable for both input and output. | |
| 219 enum TrimPositions { | |
| 220 TRIM_NONE = 0, | |
| 221 TRIM_LEADING = 1 << 0, | |
| 222 TRIM_TRAILING = 1 << 1, | |
| 223 TRIM_ALL = TRIM_LEADING | TRIM_TRAILING, | |
| 224 }; | |
| 225 BASE_API TrimPositions TrimWhitespace(const std::wstring& input, | |
| 226 TrimPositions positions, | |
| 227 std::wstring* output); | |
| 228 BASE_API TrimPositions TrimWhitespace(const string16& input, | |
| 229 TrimPositions positions, | |
| 230 string16* output); | |
| 231 BASE_API TrimPositions TrimWhitespaceASCII(const std::string& input, | |
| 232 TrimPositions positions, | |
| 233 std::string* output); | |
| 234 | |
| 235 // Deprecated. This function is only for backward compatibility and calls | |
| 236 // TrimWhitespaceASCII(). | |
| 237 BASE_API TrimPositions TrimWhitespace(const std::string& input, | |
| 238 TrimPositions positions, | |
| 239 std::string* output); | |
| 240 | |
| 241 // Searches for CR or LF characters. Removes all contiguous whitespace | |
| 242 // strings that contain them. This is useful when trying to deal with text | |
| 243 // copied from terminals. | |
| 244 // Returns |text|, with the following three transformations: | |
| 245 // (1) Leading and trailing whitespace is trimmed. | |
| 246 // (2) If |trim_sequences_with_line_breaks| is true, any other whitespace | |
| 247 // sequences containing a CR or LF are trimmed. | |
| 248 // (3) All other whitespace sequences are converted to single spaces. | |
| 249 BASE_API std::wstring CollapseWhitespace(const std::wstring& text, | |
| 250 bool trim_sequences_with_line_breaks); | |
| 251 BASE_API string16 CollapseWhitespace(const string16& text, | |
| 252 bool trim_sequences_with_line_breaks); | |
| 253 BASE_API std::string CollapseWhitespaceASCII( | |
| 254 const std::string& text, bool trim_sequences_with_line_breaks); | |
| 255 | |
| 256 // Returns true if the passed string is empty or contains only white-space | |
| 257 // characters. | |
| 258 BASE_API bool ContainsOnlyWhitespaceASCII(const std::string& str); | |
| 259 BASE_API bool ContainsOnlyWhitespace(const string16& str); | |
| 260 | |
| 261 // Returns true if |input| is empty or contains only characters found in | |
| 262 // |characters|. | |
| 263 BASE_API bool ContainsOnlyChars(const std::wstring& input, | |
| 264 const std::wstring& characters); | |
| 265 BASE_API bool ContainsOnlyChars(const string16& input, | |
| 266 const string16& characters); | |
| 267 BASE_API bool ContainsOnlyChars(const std::string& input, | |
| 268 const std::string& characters); | |
| 269 | |
| 270 // Converts to 7-bit ASCII by truncating. The result must be known to be ASCII | |
| 271 // beforehand. | |
| 272 BASE_API std::string WideToASCII(const std::wstring& wide); | |
| 273 BASE_API std::string UTF16ToASCII(const string16& utf16); | |
| 274 | |
| 275 // Converts the given wide string to the corresponding Latin1. This will fail | |
| 276 // (return false) if any characters are more than 255. | |
| 277 BASE_API bool WideToLatin1(const std::wstring& wide, std::string* latin1); | |
| 278 | |
| 279 // Returns true if the specified string matches the criteria. How can a wide | |
| 280 // string be 8-bit or UTF8? It contains only characters that are < 256 (in the | |
| 281 // first case) or characters that use only 8-bits and whose 8-bit | |
| 282 // representation looks like a UTF-8 string (the second case). | |
| 283 // | |
| 284 // Note that IsStringUTF8 checks not only if the input is structurally | |
| 285 // valid but also if it doesn't contain any non-character codepoint | |
| 286 // (e.g. U+FFFE). It's done on purpose because all the existing callers want | |
| 287 // to have the maximum 'discriminating' power from other encodings. If | |
| 288 // there's a use case for just checking the structural validity, we have to | |
| 289 // add a new function for that. | |
| 290 BASE_API bool IsStringUTF8(const std::string& str); | |
| 291 BASE_API bool IsStringASCII(const std::wstring& str); | |
| 292 BASE_API bool IsStringASCII(const base::StringPiece& str); | |
| 293 BASE_API bool IsStringASCII(const string16& str); | |
| 294 | |
| 295 // Converts the elements of the given string. This version uses a pointer to | |
| 296 // clearly differentiate it from the non-pointer variant. | |
| 297 template <class str> inline void StringToLowerASCII(str* s) { | |
| 298 for (typename str::iterator i = s->begin(); i != s->end(); ++i) | |
| 299 *i = base::ToLowerASCII(*i); | |
| 300 } | |
| 301 | |
| 302 template <class str> inline str StringToLowerASCII(const str& s) { | |
| 303 // for std::string and std::wstring | |
| 304 str output(s); | |
| 305 StringToLowerASCII(&output); | |
| 306 return output; | |
| 307 } | |
| 308 | |
| 309 // Converts the elements of the given string. This version uses a pointer to | |
| 310 // clearly differentiate it from the non-pointer variant. | |
| 311 template <class str> inline void StringToUpperASCII(str* s) { | |
| 312 for (typename str::iterator i = s->begin(); i != s->end(); ++i) | |
| 313 *i = base::ToUpperASCII(*i); | |
| 314 } | |
| 315 | |
| 316 template <class str> inline str StringToUpperASCII(const str& s) { | |
| 317 // for std::string and std::wstring | |
| 318 str output(s); | |
| 319 StringToUpperASCII(&output); | |
| 320 return output; | |
| 321 } | |
| 322 | |
| 323 // Compare the lower-case form of the given string against the given ASCII | |
| 324 // string. This is useful for doing checking if an input string matches some | |
| 325 // token, and it is optimized to avoid intermediate string copies. This API is | |
| 326 // borrowed from the equivalent APIs in Mozilla. | |
| 327 BASE_API bool LowerCaseEqualsASCII(const std::string& a, const char* b); | |
| 328 BASE_API bool LowerCaseEqualsASCII(const std::wstring& a, const char* b); | |
| 329 BASE_API bool LowerCaseEqualsASCII(const string16& a, const char* b); | |
| 330 | |
| 331 // Same thing, but with string iterators instead. | |
| 332 BASE_API bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, | |
| 333 std::string::const_iterator a_end, | |
| 334 const char* b); | |
| 335 BASE_API bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin, | |
| 336 std::wstring::const_iterator a_end, | |
| 337 const char* b); | |
| 338 BASE_API bool LowerCaseEqualsASCII(string16::const_iterator a_begin, | |
| 339 string16::const_iterator a_end, | |
| 340 const char* b); | |
| 341 BASE_API bool LowerCaseEqualsASCII(const char* a_begin, | |
| 342 const char* a_end, | |
| 343 const char* b); | |
| 344 BASE_API bool LowerCaseEqualsASCII(const wchar_t* a_begin, | |
| 345 const wchar_t* a_end, | |
| 346 const char* b); | |
| 347 BASE_API bool LowerCaseEqualsASCII(const char16* a_begin, | |
| 348 const char16* a_end, | |
| 349 const char* b); | |
| 350 | |
| 351 // Performs a case-sensitive string compare. The behavior is undefined if both | |
| 352 // strings are not ASCII. | |
| 353 BASE_API bool EqualsASCII(const string16& a, const base::StringPiece& b); | |
| 354 | |
| 355 // Returns true if str starts with search, or false otherwise. | |
| 356 BASE_API bool StartsWithASCII(const std::string& str, | |
| 357 const std::string& search, | |
| 358 bool case_sensitive); | |
| 359 BASE_API bool StartsWith(const std::wstring& str, | |
| 360 const std::wstring& search, | |
| 361 bool case_sensitive); | |
| 362 BASE_API bool StartsWith(const string16& str, | |
| 363 const string16& search, | |
| 364 bool case_sensitive); | |
| 365 | |
| 366 // Returns true if str ends with search, or false otherwise. | |
| 367 BASE_API bool EndsWith(const std::string& str, | |
| 368 const std::string& search, | |
| 369 bool case_sensitive); | |
| 370 BASE_API bool EndsWith(const std::wstring& str, | |
| 371 const std::wstring& search, | |
| 372 bool case_sensitive); | |
| 373 BASE_API bool EndsWith(const string16& str, | |
| 374 const string16& search, | |
| 375 bool case_sensitive); | |
| 376 | |
| 377 | |
| 378 // Determines the type of ASCII character, independent of locale (the C | |
| 379 // library versions will change based on locale). | |
| 380 template <typename Char> | |
| 381 inline bool IsAsciiWhitespace(Char c) { | |
| 382 return c == ' ' || c == '\r' || c == '\n' || c == '\t'; | |
| 383 } | |
| 384 template <typename Char> | |
| 385 inline bool IsAsciiAlpha(Char c) { | |
| 386 return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')); | |
| 387 } | |
| 388 template <typename Char> | |
| 389 inline bool IsAsciiDigit(Char c) { | |
| 390 return c >= '0' && c <= '9'; | |
| 391 } | |
| 392 | |
| 393 template <typename Char> | |
| 394 inline bool IsHexDigit(Char c) { | |
| 395 return (c >= '0' && c <= '9') || | |
| 396 (c >= 'A' && c <= 'F') || | |
| 397 (c >= 'a' && c <= 'f'); | |
| 398 } | |
| 399 | |
| 400 template <typename Char> | |
| 401 inline Char HexDigitToInt(Char c) { | |
| 402 DCHECK(IsHexDigit(c)); | |
| 403 if (c >= '0' && c <= '9') | |
| 404 return c - '0'; | |
| 405 if (c >= 'A' && c <= 'F') | |
| 406 return c - 'A' + 10; | |
| 407 if (c >= 'a' && c <= 'f') | |
| 408 return c - 'a' + 10; | |
| 409 return 0; | |
| 410 } | |
| 411 | |
| 412 // Returns true if it's a whitespace character. | |
| 413 inline bool IsWhitespace(wchar_t c) { | |
| 414 return wcschr(kWhitespaceWide, c) != NULL; | |
| 415 } | |
| 416 | |
| 417 enum DataUnits { | |
| 418 DATA_UNITS_BYTE = 0, | |
| 419 DATA_UNITS_KIBIBYTE, | |
| 420 DATA_UNITS_MEBIBYTE, | |
| 421 DATA_UNITS_GIBIBYTE, | |
| 422 }; | |
| 423 | |
| 424 // Return the unit type that is appropriate for displaying the amount of bytes | |
| 425 // passed in. | |
| 426 BASE_API DataUnits GetByteDisplayUnits(int64 bytes); | |
| 427 | |
| 428 // Return a byte string in human-readable format, displayed in units appropriate | |
| 429 // specified by 'units', with an optional unit suffix. | |
| 430 // Ex: FormatBytes(512, DATA_UNITS_KIBIBYTE, true) => "0.5 KB" | |
| 431 // Ex: FormatBytes(10*1024, DATA_UNITS_MEBIBYTE, false) => "0.1" | |
| 432 BASE_API string16 FormatBytes(int64 bytes, DataUnits units, bool show_units); | |
| 433 | |
| 434 // As above, but with "/s" units. | |
| 435 // Ex: FormatSpeed(512, DATA_UNITS_KIBIBYTE, true) => "0.5 KB/s" | |
| 436 // Ex: FormatSpeed(10*1024, DATA_UNITS_MEBIBYTE, false) => "0.1" | |
| 437 BASE_API string16 FormatSpeed(int64 bytes, DataUnits units, bool show_units); | |
| 438 | |
| 439 // Return a number formated with separators in the user's locale way. | |
| 440 // Ex: FormatNumber(1234567) => 1,234,567 | |
| 441 BASE_API string16 FormatNumber(int64 number); | |
| 442 | |
| 443 // Starting at |start_offset| (usually 0), replace the first instance of | |
| 444 // |find_this| with |replace_with|. | |
| 445 BASE_API void ReplaceFirstSubstringAfterOffset(string16* str, | |
| 446 string16::size_type start_offset, | |
| 447 const string16& find_this, | |
| 448 const string16& replace_with); | |
| 449 BASE_API void ReplaceFirstSubstringAfterOffset( | |
| 450 std::string* str, | |
| 451 std::string::size_type start_offset, | |
| 452 const std::string& find_this, | |
| 453 const std::string& replace_with); | |
| 454 | |
| 455 // Starting at |start_offset| (usually 0), look through |str| and replace all | |
| 456 // instances of |find_this| with |replace_with|. | |
| 457 // | |
| 458 // This does entire substrings; use std::replace in <algorithm> for single | |
| 459 // characters, for example: | |
| 460 // std::replace(str.begin(), str.end(), 'a', 'b'); | |
| 461 BASE_API void ReplaceSubstringsAfterOffset(string16* str, | |
| 462 string16::size_type start_offset, | |
| 463 const string16& find_this, | |
| 464 const string16& replace_with); | |
| 465 BASE_API void ReplaceSubstringsAfterOffset(std::string* str, | |
| 466 std::string::size_type start_offset, | |
| 467 const std::string& find_this, | |
| 468 const std::string& replace_with); | |
| 469 | |
| 470 // This is mpcomplete's pattern for saving a string copy when dealing with | |
| 471 // a function that writes results into a wchar_t[] and wanting the result to | |
| 472 // end up in a std::wstring. It ensures that the std::wstring's internal | |
| 473 // buffer has enough room to store the characters to be written into it, and | |
| 474 // sets its .length() attribute to the right value. | |
| 475 // | |
| 476 // The reserve() call allocates the memory required to hold the string | |
| 477 // plus a terminating null. This is done because resize() isn't | |
| 478 // guaranteed to reserve space for the null. The resize() call is | |
| 479 // simply the only way to change the string's 'length' member. | |
| 480 // | |
| 481 // XXX-performance: the call to wide.resize() takes linear time, since it fills | |
| 482 // the string's buffer with nulls. I call it to change the length of the | |
| 483 // string (needed because writing directly to the buffer doesn't do this). | |
| 484 // Perhaps there's a constant-time way to change the string's length. | |
| 485 template <class string_type> | |
| 486 inline typename string_type::value_type* WriteInto(string_type* str, | |
| 487 size_t length_with_null) { | |
| 488 str->reserve(length_with_null); | |
| 489 str->resize(length_with_null - 1); | |
| 490 return &((*str)[0]); | |
| 491 } | |
| 492 | |
| 493 //----------------------------------------------------------------------------- | |
| 494 | |
| 495 // Splits a string into its fields delimited by any of the characters in | |
| 496 // |delimiters|. Each field is added to the |tokens| vector. Returns the | |
| 497 // number of tokens found. | |
| 498 BASE_API size_t Tokenize(const std::wstring& str, | |
| 499 const std::wstring& delimiters, | |
| 500 std::vector<std::wstring>* tokens); | |
| 501 BASE_API size_t Tokenize(const string16& str, | |
| 502 const string16& delimiters, | |
| 503 std::vector<string16>* tokens); | |
| 504 BASE_API size_t Tokenize(const std::string& str, | |
| 505 const std::string& delimiters, | |
| 506 std::vector<std::string>* tokens); | |
| 507 BASE_API size_t Tokenize(const base::StringPiece& str, | |
| 508 const base::StringPiece& delimiters, | |
| 509 std::vector<base::StringPiece>* tokens); | |
| 510 | |
| 511 // Does the opposite of SplitString(). | |
| 512 BASE_API string16 JoinString(const std::vector<string16>& parts, char16 s); | |
| 513 BASE_API std::string JoinString(const std::vector<std::string>& parts, char s); | |
| 514 | |
| 515 // Replace $1-$2-$3..$9 in the format string with |a|-|b|-|c|..|i| respectively. | |
| 516 // Additionally, any number of consecutive '$' characters is replaced by that | |
| 517 // number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be | |
| 518 // NULL. This only allows you to use up to nine replacements. | |
| 519 BASE_API string16 ReplaceStringPlaceholders(const string16& format_string, | |
| 520 const std::vector<string16>& subst, | |
| 521 std::vector<size_t>* offsets); | |
| 522 | |
| 523 BASE_API std::string ReplaceStringPlaceholders( | |
| 524 const base::StringPiece& format_string, | |
| 525 const std::vector<std::string>& subst, | |
| 526 std::vector<size_t>* offsets); | |
| 527 | |
| 528 // Single-string shortcut for ReplaceStringHolders. |offset| may be NULL. | |
| 529 BASE_API string16 ReplaceStringPlaceholders(const string16& format_string, | |
| 530 const string16& a, | |
| 531 size_t* offset); | |
| 532 | |
| 533 // Returns true if the string passed in matches the pattern. The pattern | |
| 534 // string can contain wildcards like * and ? | |
| 535 // The backslash character (\) is an escape character for * and ? | |
| 536 // We limit the patterns to having a max of 16 * or ? characters. | |
| 537 // ? matches 0 or 1 character, while * matches 0 or more characters. | |
| 538 BASE_API bool MatchPattern(const base::StringPiece& string, | |
| 539 const base::StringPiece& pattern); | |
| 540 BASE_API bool MatchPattern(const string16& string, const string16& pattern); | |
| 541 | |
| 542 // Hack to convert any char-like type to its unsigned counterpart. | |
| 543 // For example, it will convert char, signed char and unsigned char to unsigned | |
| 544 // char. | |
| 545 template<typename T> | |
| 546 struct ToUnsigned { | |
| 547 typedef T Unsigned; | |
| 548 }; | |
| 549 | |
| 550 template<> | |
| 551 struct ToUnsigned<char> { | |
| 552 typedef unsigned char Unsigned; | |
| 553 }; | |
| 554 template<> | |
| 555 struct ToUnsigned<signed char> { | |
| 556 typedef unsigned char Unsigned; | |
| 557 }; | |
| 558 template<> | |
| 559 struct ToUnsigned<wchar_t> { | |
| 560 #if defined(WCHAR_T_IS_UTF16) | |
| 561 typedef unsigned short Unsigned; | |
| 562 #elif defined(WCHAR_T_IS_UTF32) | |
| 563 typedef uint32 Unsigned; | |
| 564 #endif | |
| 565 }; | |
| 566 template<> | |
| 567 struct ToUnsigned<short> { | |
| 568 typedef unsigned short Unsigned; | |
| 569 }; | |
| 570 | |
| 571 #endif // BASE_STRING_UTIL_H_ | |
| OLD | NEW |