| Index: base/strings/string_util.h
|
| diff --git a/base/strings/string_util.h b/base/strings/string_util.h
|
| index bea44ae8fddbaa0992d2d76b5d2719950df70e1d..9e50a332259c5b60f9cc1a373ad860c151d9b9ac 100644
|
| --- a/base/strings/string_util.h
|
| +++ b/base/strings/string_util.h
|
| @@ -21,23 +21,10 @@
|
|
|
| namespace base {
|
|
|
| -// C standard-library functions like "strncasecmp" and "snprintf" that aren't
|
| -// cross-platform are provided as "base::strncasecmp", and their prototypes
|
| -// are listed below. These functions are then implemented as inline calls
|
| -// to the platform-specific equivalents in the platform-specific headers.
|
| -
|
| -// Compares the two strings s1 and s2 without regard to case using
|
| -// the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if
|
| -// s2 > s1 according to a lexicographic comparison.
|
| -int strcasecmp(const char* s1, const char* s2);
|
| -
|
| -// Compares up to count characters of s1 and s2 without regard to case using
|
| -// the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if
|
| -// s2 > s1 according to a lexicographic comparison.
|
| -int strncasecmp(const char* s1, const char* s2, size_t count);
|
| -
|
| -// Same as strncmp but for char16 strings.
|
| -int strncmp16(const char16* s1, const char16* s2, size_t count);
|
| +// C standard-library functions that aren't cross-platform are provided as
|
| +// "base::...", and their prototypes are listed below. These functions are
|
| +// then implemented as inline calls to the platform-specific equivalents in the
|
| +// platform-specific headers.
|
|
|
| // Wrapper for vsnprintf that always null-terminates and always returns the
|
| // number of characters that would be in an untruncated formatted
|
| @@ -59,6 +46,19 @@ inline int snprintf(char* buffer, size_t size, const char* format, ...) {
|
| return result;
|
| }
|
|
|
| +// TODO(mark) http://crbug.com/472900 crashpad shouldn't use base while
|
| +// being DEPSed in. This backwards-compat hack is provided until crashpad is
|
| +// updated.
|
| +#if defined(OS_WIN)
|
| +inline int strcasecmp(const char* s1, const char* s2) {
|
| + return _stricmp(s1, s2);
|
| +}
|
| +#else // Posix
|
| +inline int strcasecmp(const char* string1, const char* string2) {
|
| + return ::strcasecmp(string1, string2);
|
| +}
|
| +#endif
|
| +
|
| // BSD-style safe and consistent string copy functions.
|
| // Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|.
|
| // Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as
|
| @@ -103,17 +103,14 @@ template <class Char> inline Char ToUpperASCII(Char c) {
|
| return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c;
|
| }
|
|
|
| -// Function objects to aid in comparing/searching strings.
|
| -
|
| -template<typename Char> struct CaseInsensitiveCompare {
|
| - public:
|
| - bool operator()(Char x, Char y) const {
|
| - // TODO(darin): Do we really want to do locale sensitive comparisons here?
|
| - // See http://crbug.com/24917
|
| - return tolower(x) == tolower(y);
|
| - }
|
| -};
|
| -
|
| +// Functor for case-insensitive ASCII comparisons for STL algorithms like
|
| +// std::search.
|
| +//
|
| +// Note that a full Unicode version of this functor is not possible to write
|
| +// because case mappings might change the number of characters, depend on
|
| +// context (combining accents), and require handling UTF-16. If you need
|
| +// proper Unicode support, use base::i18n::ToLower/FoldCase and then just
|
| +// use a normal operator== on the result.
|
| template<typename Char> struct CaseInsensitiveCompareASCII {
|
| public:
|
| bool operator()(Char x, Char y) const {
|
| @@ -121,6 +118,22 @@ template<typename Char> struct CaseInsensitiveCompareASCII {
|
| }
|
| };
|
|
|
| +// Like strcasecmp for case-insensitive ASCII characters only. Returns:
|
| +// -1 (a < b)
|
| +// 0 (a == b)
|
| +// 1 (a > b)
|
| +// (unlike strcasecmp which can return values greater or less than 1/-1). For
|
| +// full Unicode support, use base::i18n::ToLower or base::i18h::FoldCase
|
| +// and then just call the normal string operators on the result.
|
| +BASE_EXPORT int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b);
|
| +BASE_EXPORT int CompareCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b);
|
| +
|
| +// Equality for ASCII case-insensitive comparisons. For full Unicode support,
|
| +// use base::i18n::ToLower or base::i18h::FoldCase and then compare with either
|
| +// == or !=.
|
| +BASE_EXPORT bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b);
|
| +BASE_EXPORT bool EqualsCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b);
|
| +
|
| // These threadsafe functions return references to globally unique empty
|
| // strings.
|
| //
|
| @@ -138,10 +151,12 @@ BASE_EXPORT const std::string& EmptyString();
|
| BASE_EXPORT const string16& EmptyString16();
|
|
|
| // Contains the set of characters representing whitespace in the corresponding
|
| -// encoding. Null-terminated.
|
| -BASE_EXPORT extern const wchar_t kWhitespaceWide[];
|
| -BASE_EXPORT extern const char16 kWhitespaceUTF16[];
|
| +// encoding. Null-terminated. The ASCII versions are the whitespaces as defined
|
| +// by HTML5, and don't include control characters.
|
| +BASE_EXPORT extern const wchar_t kWhitespaceWide[]; // Includes Unicode.
|
| +BASE_EXPORT extern const char16 kWhitespaceUTF16[]; // Includes Unicode.
|
| BASE_EXPORT extern const char kWhitespaceASCII[];
|
| +BASE_EXPORT extern const char16 kWhitespaceASCIIAs16[]; // No unicode.
|
|
|
| // Null-terminated string representing the UTF-8 byte order mark.
|
| BASE_EXPORT extern const char kUtf8ByteOrderMark[];
|
| @@ -150,10 +165,10 @@ BASE_EXPORT extern const char kUtf8ByteOrderMark[];
|
| // if any characters were removed. |remove_chars| must be null-terminated.
|
| // NOTE: Safe to use the same variable for both |input| and |output|.
|
| BASE_EXPORT bool RemoveChars(const string16& input,
|
| - const base::StringPiece16& remove_chars,
|
| + const StringPiece16& remove_chars,
|
| string16* output);
|
| BASE_EXPORT bool RemoveChars(const std::string& input,
|
| - const base::StringPiece& remove_chars,
|
| + const StringPiece& remove_chars,
|
| std::string* output);
|
|
|
| // Replaces characters in |replace_chars| from anywhere in |input| with
|
| @@ -162,49 +177,65 @@ BASE_EXPORT bool RemoveChars(const std::string& input,
|
| // |replace_chars| must be null-terminated.
|
| // NOTE: Safe to use the same variable for both |input| and |output|.
|
| BASE_EXPORT bool ReplaceChars(const string16& input,
|
| - const base::StringPiece16& replace_chars,
|
| + const StringPiece16& replace_chars,
|
| const string16& replace_with,
|
| string16* output);
|
| BASE_EXPORT bool ReplaceChars(const std::string& input,
|
| - const base::StringPiece& replace_chars,
|
| + const StringPiece& replace_chars,
|
| const std::string& replace_with,
|
| std::string* output);
|
|
|
| +enum TrimPositions {
|
| + TRIM_NONE = 0,
|
| + TRIM_LEADING = 1 << 0,
|
| + TRIM_TRAILING = 1 << 1,
|
| + TRIM_ALL = TRIM_LEADING | TRIM_TRAILING,
|
| +};
|
| +
|
| // Removes characters in |trim_chars| from the beginning and end of |input|.
|
| -// |trim_chars| must be null-terminated.
|
| -// NOTE: Safe to use the same variable for both |input| and |output|.
|
| +// The 8-bit version only works on 8-bit characters, not UTF-8.
|
| +//
|
| +// It is safe to use the same variable for both |input| and |output| (this is
|
| +// the normal usage to trim in-place).
|
| BASE_EXPORT bool TrimString(const string16& input,
|
| - const base::StringPiece16& trim_chars,
|
| + StringPiece16 trim_chars,
|
| string16* output);
|
| BASE_EXPORT bool TrimString(const std::string& input,
|
| - const base::StringPiece& trim_chars,
|
| + StringPiece trim_chars,
|
| std::string* output);
|
|
|
| +// StringPiece versions of the above. The returned pieces refer to the original
|
| +// buffer.
|
| +BASE_EXPORT StringPiece16 TrimString(StringPiece16 input,
|
| + const StringPiece16& trim_chars,
|
| + TrimPositions positions);
|
| +BASE_EXPORT StringPiece TrimString(StringPiece input,
|
| + const StringPiece& trim_chars,
|
| + TrimPositions positions);
|
| +
|
| // Truncates a string to the nearest UTF-8 character that will leave
|
| // the string less than or equal to the specified byte size.
|
| BASE_EXPORT void TruncateUTF8ToByteSize(const std::string& input,
|
| const size_t byte_size,
|
| std::string* output);
|
|
|
| -// Trims any whitespace from either end of the input string. Returns where
|
| -// whitespace was found.
|
| -// The non-wide version has two functions:
|
| -// * TrimWhitespaceASCII()
|
| -// This function is for ASCII strings and only looks for ASCII whitespace;
|
| -// Please choose the best one according to your usage.
|
| +// Trims any whitespace from either end of the input string.
|
| +//
|
| +// The StringPiece versions return a substring referencing the input buffer.
|
| +// The ASCII versions look only for ASCII whitespace.
|
| +//
|
| +// The std::string versions return where whitespace was found.
|
| // NOTE: Safe to use the same variable for both input and output.
|
| -enum TrimPositions {
|
| - TRIM_NONE = 0,
|
| - TRIM_LEADING = 1 << 0,
|
| - TRIM_TRAILING = 1 << 1,
|
| - TRIM_ALL = TRIM_LEADING | TRIM_TRAILING,
|
| -};
|
| BASE_EXPORT TrimPositions TrimWhitespace(const string16& input,
|
| TrimPositions positions,
|
| - base::string16* output);
|
| + string16* output);
|
| +BASE_EXPORT StringPiece16 TrimWhitespace(StringPiece16 input,
|
| + TrimPositions positions);
|
| BASE_EXPORT TrimPositions TrimWhitespaceASCII(const std::string& input,
|
| TrimPositions positions,
|
| std::string* output);
|
| +BASE_EXPORT StringPiece TrimWhitespaceASCII(StringPiece input,
|
| + TrimPositions positions);
|
|
|
| // Deprecated. This function is only for backward compatibility and calls
|
| // TrimWhitespaceASCII().
|
| @@ -315,32 +346,41 @@ BASE_EXPORT bool LowerCaseEqualsASCII(const char16* a_begin,
|
| // strings are not ASCII.
|
| BASE_EXPORT bool EqualsASCII(const string16& a, const StringPiece& b);
|
|
|
| -} // namespace base
|
| -
|
| -#if defined(OS_WIN)
|
| -#include "base/strings/string_util_win.h"
|
| -#elif defined(OS_POSIX)
|
| -#include "base/strings/string_util_posix.h"
|
| -#else
|
| -#error Define string operations appropriately for your platform
|
| -#endif
|
| -
|
| -// Returns true if str starts with search, or false otherwise.
|
| -BASE_EXPORT bool StartsWithASCII(const std::string& str,
|
| - const std::string& search,
|
| - bool case_sensitive);
|
| -BASE_EXPORT bool StartsWith(const base::string16& str,
|
| - const base::string16& search,
|
| - bool case_sensitive);
|
| -
|
| -// Returns true if str ends with search, or false otherwise.
|
| -BASE_EXPORT bool EndsWith(const std::string& str,
|
| - const std::string& search,
|
| - bool case_sensitive);
|
| -BASE_EXPORT bool EndsWith(const base::string16& str,
|
| - const base::string16& search,
|
| - bool case_sensitive);
|
| +// Indicates case sensitivity of comparisons. Only ASCII case insensitivity
|
| +// is supported. Full Unicode case-insensitive conversions would need to go in
|
| +// base/i18n so it can use ICU.
|
| +//
|
| +// If you need to do Unicode-aware case-insensitive StartsWith/EndsWith, it's
|
| +// best to call base::i18n::ToLower() or base::i18n::FoldCase() (see
|
| +// base/i18n/case_conversion.h for usage advice) on the arguments, and then use
|
| +// the results to a case-sensitive comparison.
|
| +enum class CompareCase {
|
| + SENSITIVE,
|
| + INSENSITIVE_ASCII,
|
| +};
|
|
|
| +BASE_EXPORT bool StartsWith(StringPiece str,
|
| + StringPiece search_for,
|
| + CompareCase case_sensitivity);
|
| +BASE_EXPORT bool StartsWith(StringPiece16 str,
|
| + StringPiece16 search_for,
|
| + CompareCase case_sensitivity);
|
| +BASE_EXPORT bool EndsWith(StringPiece str,
|
| + StringPiece search_for,
|
| + CompareCase case_sensitivity);
|
| +BASE_EXPORT bool EndsWith(StringPiece16 str,
|
| + StringPiece16 search_for,
|
| + CompareCase case_sensitivity);
|
| +
|
| +// DEPRECATED. Returns true if str starts/ends with search, or false otherwise.
|
| +// TODO(brettw) remove in favor of the "enum" versions above.
|
| +inline bool StartsWithASCII(const std::string& str,
|
| + const std::string& search,
|
| + bool case_sensitive) {
|
| + return StartsWith(
|
| + StringPiece(str), StringPiece(search),
|
| + case_sensitive ? CompareCase::SENSITIVE : CompareCase::INSENSITIVE_ASCII);
|
| +}
|
|
|
| // Determines the type of ASCII character, independent of locale (the C
|
| // library versions will change based on locale).
|
| @@ -364,20 +404,15 @@ inline bool IsHexDigit(Char c) {
|
| (c >= 'a' && c <= 'f');
|
| }
|
|
|
| -template <typename Char>
|
| -inline char HexDigitToInt(Char c) {
|
| - DCHECK(IsHexDigit(c));
|
| - if (c >= '0' && c <= '9')
|
| - return static_cast<char>(c - '0');
|
| - if (c >= 'A' && c <= 'F')
|
| - return static_cast<char>(c - 'A' + 10);
|
| - if (c >= 'a' && c <= 'f')
|
| - return static_cast<char>(c - 'a' + 10);
|
| - return 0;
|
| -}
|
| +// Returns the integer corresponding to the given hex character. For example:
|
| +// '4' -> 4
|
| +// 'a' -> 10
|
| +// 'B' -> 11
|
| +// Assumes the input is a valid hex character. DCHECKs in debug builds if not.
|
| +BASE_EXPORT char HexDigitToInt(wchar_t c);
|
|
|
| -// Returns true if it's a whitespace character.
|
| -inline bool IsWhitespace(wchar_t c) {
|
| +// Returns true if it's a Unicode whitespace character.
|
| +inline bool IsUnicodeWhitespace(wchar_t c) {
|
| return wcschr(base::kWhitespaceWide, c) != NULL;
|
| }
|
|
|
| @@ -385,20 +420,18 @@ inline bool IsWhitespace(wchar_t c) {
|
| // appropriate for use in any UI; use of FormatBytes and friends in ui/base is
|
| // highly recommended instead. TODO(avi): Figure out how to get callers to use
|
| // FormatBytes instead; remove this.
|
| -BASE_EXPORT base::string16 FormatBytesUnlocalized(int64 bytes);
|
| +BASE_EXPORT string16 FormatBytesUnlocalized(int64 bytes);
|
|
|
| // Starting at |start_offset| (usually 0), replace the first instance of
|
| // |find_this| with |replace_with|.
|
| -BASE_EXPORT void ReplaceFirstSubstringAfterOffset(
|
| - base::string16* str,
|
| - size_t start_offset,
|
| - const base::string16& find_this,
|
| - const base::string16& replace_with);
|
| -BASE_EXPORT void ReplaceFirstSubstringAfterOffset(
|
| - std::string* str,
|
| - size_t start_offset,
|
| - const std::string& find_this,
|
| - const std::string& replace_with);
|
| +BASE_EXPORT void ReplaceFirstSubstringAfterOffset(base::string16* str,
|
| + size_t start_offset,
|
| + StringPiece16 find_this,
|
| + StringPiece16 replace_with);
|
| +BASE_EXPORT void ReplaceFirstSubstringAfterOffset(std::string* str,
|
| + size_t start_offset,
|
| + StringPiece find_this,
|
| + StringPiece replace_with);
|
|
|
| // Starting at |start_offset| (usually 0), look through |str| and replace all
|
| // instances of |find_this| with |replace_with|.
|
| @@ -406,15 +439,14 @@ BASE_EXPORT void ReplaceFirstSubstringAfterOffset(
|
| // This does entire substrings; use std::replace in <algorithm> for single
|
| // characters, for example:
|
| // std::replace(str.begin(), str.end(), 'a', 'b');
|
| -BASE_EXPORT void ReplaceSubstringsAfterOffset(
|
| - base::string16* str,
|
| - size_t start_offset,
|
| - const base::string16& find_this,
|
| - const base::string16& replace_with);
|
| +BASE_EXPORT void ReplaceSubstringsAfterOffset(string16* str,
|
| + size_t start_offset,
|
| + StringPiece16 find_this,
|
| + StringPiece16 replace_with);
|
| BASE_EXPORT void ReplaceSubstringsAfterOffset(std::string* str,
|
| size_t start_offset,
|
| - const std::string& find_this,
|
| - const std::string& replace_with);
|
| + StringPiece find_this,
|
| + StringPiece replace_with);
|
|
|
| // Reserves enough memory in |str| to accommodate |length_with_null| characters,
|
| // sets the size of |str| to |length_with_null - 1| characters, and returns a
|
| @@ -436,72 +468,45 @@ BASE_EXPORT void ReplaceSubstringsAfterOffset(std::string* str,
|
| // of the string, and not doing that will mean people who access |str| rather
|
| // than str.c_str() will get back a string of whatever size |str| had on entry
|
| // to this function (probably 0).
|
| -template <class string_type>
|
| -inline typename string_type::value_type* WriteInto(string_type* str,
|
| - size_t length_with_null) {
|
| - DCHECK_GT(length_with_null, 1u);
|
| - str->reserve(length_with_null);
|
| - str->resize(length_with_null - 1);
|
| - return &((*str)[0]);
|
| -}
|
| -
|
| -//-----------------------------------------------------------------------------
|
| -
|
| -// Splits a string into its fields delimited by any of the characters in
|
| -// |delimiters|. Each field is added to the |tokens| vector. Returns the
|
| -// number of tokens found.
|
| -BASE_EXPORT size_t Tokenize(const base::string16& str,
|
| - const base::string16& delimiters,
|
| - std::vector<base::string16>* tokens);
|
| -BASE_EXPORT size_t Tokenize(const std::string& str,
|
| - const std::string& delimiters,
|
| - std::vector<std::string>* tokens);
|
| -BASE_EXPORT size_t Tokenize(const base::StringPiece& str,
|
| - const base::StringPiece& delimiters,
|
| - std::vector<base::StringPiece>* tokens);
|
| +BASE_EXPORT char* WriteInto(std::string* str, size_t length_with_null);
|
| +BASE_EXPORT char16* WriteInto(string16* str, size_t length_with_null);
|
| +#ifndef OS_WIN
|
| +BASE_EXPORT wchar_t* WriteInto(std::wstring* str, size_t length_with_null);
|
| +#endif
|
|
|
| // Does the opposite of SplitString().
|
| -BASE_EXPORT base::string16 JoinString(const std::vector<base::string16>& parts,
|
| - base::char16 s);
|
| -BASE_EXPORT std::string JoinString(
|
| - const std::vector<std::string>& parts, char s);
|
| -
|
| -// Join |parts| using |separator|.
|
| -BASE_EXPORT std::string JoinString(
|
| - const std::vector<std::string>& parts,
|
| - const std::string& separator);
|
| -BASE_EXPORT base::string16 JoinString(
|
| - const std::vector<base::string16>& parts,
|
| - const base::string16& separator);
|
| +BASE_EXPORT std::string JoinString(const std::vector<std::string>& parts,
|
| + StringPiece separator);
|
| +BASE_EXPORT string16 JoinString(const std::vector<string16>& parts,
|
| + StringPiece16 separator);
|
|
|
| // Replace $1-$2-$3..$9 in the format string with |a|-|b|-|c|..|i| respectively.
|
| // Additionally, any number of consecutive '$' characters is replaced by that
|
| // number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be
|
| // NULL. This only allows you to use up to nine replacements.
|
| -BASE_EXPORT base::string16 ReplaceStringPlaceholders(
|
| - const base::string16& format_string,
|
| - const std::vector<base::string16>& subst,
|
| - std::vector<size_t>* offsets);
|
| +BASE_EXPORT string16
|
| +ReplaceStringPlaceholders(const string16& format_string,
|
| + const std::vector<string16>& subst,
|
| + std::vector<size_t>* offsets);
|
|
|
| BASE_EXPORT std::string ReplaceStringPlaceholders(
|
| - const base::StringPiece& format_string,
|
| + const StringPiece& format_string,
|
| const std::vector<std::string>& subst,
|
| std::vector<size_t>* offsets);
|
|
|
| // Single-string shortcut for ReplaceStringHolders. |offset| may be NULL.
|
| -BASE_EXPORT base::string16 ReplaceStringPlaceholders(
|
| - const base::string16& format_string,
|
| - const base::string16& a,
|
| - size_t* offset);
|
| -
|
| -// Returns true if the string passed in matches the pattern. The pattern
|
| -// string can contain wildcards like * and ?
|
| -// The backslash character (\) is an escape character for * and ?
|
| -// We limit the patterns to having a max of 16 * or ? characters.
|
| -// ? matches 0 or 1 character, while * matches 0 or more characters.
|
| -BASE_EXPORT bool MatchPattern(const base::StringPiece& string,
|
| - const base::StringPiece& pattern);
|
| -BASE_EXPORT bool MatchPattern(const base::string16& string,
|
| - const base::string16& pattern);
|
| +BASE_EXPORT string16 ReplaceStringPlaceholders(const string16& format_string,
|
| + const string16& a,
|
| + size_t* offset);
|
| +
|
| +} // namespace base
|
| +
|
| +#if defined(OS_WIN)
|
| +#include "base/strings/string_util_win.h"
|
| +#elif defined(OS_POSIX)
|
| +#include "base/strings/string_util_posix.h"
|
| +#else
|
| +#error Define string operations appropriately for your platform
|
| +#endif
|
|
|
| #endif // BASE_STRINGS_STRING_UTIL_H_
|
|
|