Index: base/strings/string_util.h |
diff --git a/base/strings/string_util.h b/base/strings/string_util.h |
index bea44ae8fddbaa0992d2d76b5d2719950df70e1d..9e50a332259c5b60f9cc1a373ad860c151d9b9ac 100644 |
--- a/base/strings/string_util.h |
+++ b/base/strings/string_util.h |
@@ -21,23 +21,10 @@ |
namespace base { |
-// C standard-library functions like "strncasecmp" and "snprintf" that aren't |
-// cross-platform are provided as "base::strncasecmp", and their prototypes |
-// are listed below. These functions are then implemented as inline calls |
-// to the platform-specific equivalents in the platform-specific headers. |
- |
-// Compares the two strings s1 and s2 without regard to case using |
-// the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if |
-// s2 > s1 according to a lexicographic comparison. |
-int strcasecmp(const char* s1, const char* s2); |
- |
-// Compares up to count characters of s1 and s2 without regard to case using |
-// the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if |
-// s2 > s1 according to a lexicographic comparison. |
-int strncasecmp(const char* s1, const char* s2, size_t count); |
- |
-// Same as strncmp but for char16 strings. |
-int strncmp16(const char16* s1, const char16* s2, size_t count); |
+// C standard-library functions that aren't cross-platform are provided as |
+// "base::...", and their prototypes are listed below. These functions are |
+// then implemented as inline calls to the platform-specific equivalents in the |
+// platform-specific headers. |
// Wrapper for vsnprintf that always null-terminates and always returns the |
// number of characters that would be in an untruncated formatted |
@@ -59,6 +46,19 @@ inline int snprintf(char* buffer, size_t size, const char* format, ...) { |
return result; |
} |
+// TODO(mark) http://crbug.com/472900 crashpad shouldn't use base while |
+// being DEPSed in. This backwards-compat hack is provided until crashpad is |
+// updated. |
+#if defined(OS_WIN) |
+inline int strcasecmp(const char* s1, const char* s2) { |
+ return _stricmp(s1, s2); |
+} |
+#else // Posix |
+inline int strcasecmp(const char* string1, const char* string2) { |
+ return ::strcasecmp(string1, string2); |
+} |
+#endif |
+ |
// BSD-style safe and consistent string copy functions. |
// Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|. |
// Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as |
@@ -103,17 +103,14 @@ template <class Char> inline Char ToUpperASCII(Char c) { |
return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c; |
} |
-// Function objects to aid in comparing/searching strings. |
- |
-template<typename Char> struct CaseInsensitiveCompare { |
- public: |
- bool operator()(Char x, Char y) const { |
- // TODO(darin): Do we really want to do locale sensitive comparisons here? |
- // See http://crbug.com/24917 |
- return tolower(x) == tolower(y); |
- } |
-}; |
- |
+// Functor for case-insensitive ASCII comparisons for STL algorithms like |
+// std::search. |
+// |
+// Note that a full Unicode version of this functor is not possible to write |
+// because case mappings might change the number of characters, depend on |
+// context (combining accents), and require handling UTF-16. If you need |
+// proper Unicode support, use base::i18n::ToLower/FoldCase and then just |
+// use a normal operator== on the result. |
template<typename Char> struct CaseInsensitiveCompareASCII { |
public: |
bool operator()(Char x, Char y) const { |
@@ -121,6 +118,22 @@ template<typename Char> struct CaseInsensitiveCompareASCII { |
} |
}; |
+// Like strcasecmp for case-insensitive ASCII characters only. Returns: |
+// -1 (a < b) |
+// 0 (a == b) |
+// 1 (a > b) |
+// (unlike strcasecmp which can return values greater or less than 1/-1). For |
+// full Unicode support, use base::i18n::ToLower or base::i18h::FoldCase |
+// and then just call the normal string operators on the result. |
+BASE_EXPORT int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b); |
+BASE_EXPORT int CompareCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b); |
+ |
+// Equality for ASCII case-insensitive comparisons. For full Unicode support, |
+// use base::i18n::ToLower or base::i18h::FoldCase and then compare with either |
+// == or !=. |
+BASE_EXPORT bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b); |
+BASE_EXPORT bool EqualsCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b); |
+ |
// These threadsafe functions return references to globally unique empty |
// strings. |
// |
@@ -138,10 +151,12 @@ BASE_EXPORT const std::string& EmptyString(); |
BASE_EXPORT const string16& EmptyString16(); |
// Contains the set of characters representing whitespace in the corresponding |
-// encoding. Null-terminated. |
-BASE_EXPORT extern const wchar_t kWhitespaceWide[]; |
-BASE_EXPORT extern const char16 kWhitespaceUTF16[]; |
+// encoding. Null-terminated. The ASCII versions are the whitespaces as defined |
+// by HTML5, and don't include control characters. |
+BASE_EXPORT extern const wchar_t kWhitespaceWide[]; // Includes Unicode. |
+BASE_EXPORT extern const char16 kWhitespaceUTF16[]; // Includes Unicode. |
BASE_EXPORT extern const char kWhitespaceASCII[]; |
+BASE_EXPORT extern const char16 kWhitespaceASCIIAs16[]; // No unicode. |
// Null-terminated string representing the UTF-8 byte order mark. |
BASE_EXPORT extern const char kUtf8ByteOrderMark[]; |
@@ -150,10 +165,10 @@ BASE_EXPORT extern const char kUtf8ByteOrderMark[]; |
// if any characters were removed. |remove_chars| must be null-terminated. |
// NOTE: Safe to use the same variable for both |input| and |output|. |
BASE_EXPORT bool RemoveChars(const string16& input, |
- const base::StringPiece16& remove_chars, |
+ const StringPiece16& remove_chars, |
string16* output); |
BASE_EXPORT bool RemoveChars(const std::string& input, |
- const base::StringPiece& remove_chars, |
+ const StringPiece& remove_chars, |
std::string* output); |
// Replaces characters in |replace_chars| from anywhere in |input| with |
@@ -162,49 +177,65 @@ BASE_EXPORT bool RemoveChars(const std::string& input, |
// |replace_chars| must be null-terminated. |
// NOTE: Safe to use the same variable for both |input| and |output|. |
BASE_EXPORT bool ReplaceChars(const string16& input, |
- const base::StringPiece16& replace_chars, |
+ const StringPiece16& replace_chars, |
const string16& replace_with, |
string16* output); |
BASE_EXPORT bool ReplaceChars(const std::string& input, |
- const base::StringPiece& replace_chars, |
+ const StringPiece& replace_chars, |
const std::string& replace_with, |
std::string* output); |
+enum TrimPositions { |
+ TRIM_NONE = 0, |
+ TRIM_LEADING = 1 << 0, |
+ TRIM_TRAILING = 1 << 1, |
+ TRIM_ALL = TRIM_LEADING | TRIM_TRAILING, |
+}; |
+ |
// Removes characters in |trim_chars| from the beginning and end of |input|. |
-// |trim_chars| must be null-terminated. |
-// NOTE: Safe to use the same variable for both |input| and |output|. |
+// The 8-bit version only works on 8-bit characters, not UTF-8. |
+// |
+// It is safe to use the same variable for both |input| and |output| (this is |
+// the normal usage to trim in-place). |
BASE_EXPORT bool TrimString(const string16& input, |
- const base::StringPiece16& trim_chars, |
+ StringPiece16 trim_chars, |
string16* output); |
BASE_EXPORT bool TrimString(const std::string& input, |
- const base::StringPiece& trim_chars, |
+ StringPiece trim_chars, |
std::string* output); |
+// StringPiece versions of the above. The returned pieces refer to the original |
+// buffer. |
+BASE_EXPORT StringPiece16 TrimString(StringPiece16 input, |
+ const StringPiece16& trim_chars, |
+ TrimPositions positions); |
+BASE_EXPORT StringPiece TrimString(StringPiece input, |
+ const StringPiece& trim_chars, |
+ TrimPositions positions); |
+ |
// Truncates a string to the nearest UTF-8 character that will leave |
// the string less than or equal to the specified byte size. |
BASE_EXPORT void TruncateUTF8ToByteSize(const std::string& input, |
const size_t byte_size, |
std::string* output); |
-// Trims any whitespace from either end of the input string. Returns where |
-// whitespace was found. |
-// The non-wide version has two functions: |
-// * TrimWhitespaceASCII() |
-// This function is for ASCII strings and only looks for ASCII whitespace; |
-// Please choose the best one according to your usage. |
+// Trims any whitespace from either end of the input string. |
+// |
+// The StringPiece versions return a substring referencing the input buffer. |
+// The ASCII versions look only for ASCII whitespace. |
+// |
+// The std::string versions return where whitespace was found. |
// NOTE: Safe to use the same variable for both input and output. |
-enum TrimPositions { |
- TRIM_NONE = 0, |
- TRIM_LEADING = 1 << 0, |
- TRIM_TRAILING = 1 << 1, |
- TRIM_ALL = TRIM_LEADING | TRIM_TRAILING, |
-}; |
BASE_EXPORT TrimPositions TrimWhitespace(const string16& input, |
TrimPositions positions, |
- base::string16* output); |
+ string16* output); |
+BASE_EXPORT StringPiece16 TrimWhitespace(StringPiece16 input, |
+ TrimPositions positions); |
BASE_EXPORT TrimPositions TrimWhitespaceASCII(const std::string& input, |
TrimPositions positions, |
std::string* output); |
+BASE_EXPORT StringPiece TrimWhitespaceASCII(StringPiece input, |
+ TrimPositions positions); |
// Deprecated. This function is only for backward compatibility and calls |
// TrimWhitespaceASCII(). |
@@ -315,32 +346,41 @@ BASE_EXPORT bool LowerCaseEqualsASCII(const char16* a_begin, |
// strings are not ASCII. |
BASE_EXPORT bool EqualsASCII(const string16& a, const StringPiece& b); |
-} // namespace base |
- |
-#if defined(OS_WIN) |
-#include "base/strings/string_util_win.h" |
-#elif defined(OS_POSIX) |
-#include "base/strings/string_util_posix.h" |
-#else |
-#error Define string operations appropriately for your platform |
-#endif |
- |
-// Returns true if str starts with search, or false otherwise. |
-BASE_EXPORT bool StartsWithASCII(const std::string& str, |
- const std::string& search, |
- bool case_sensitive); |
-BASE_EXPORT bool StartsWith(const base::string16& str, |
- const base::string16& search, |
- bool case_sensitive); |
- |
-// Returns true if str ends with search, or false otherwise. |
-BASE_EXPORT bool EndsWith(const std::string& str, |
- const std::string& search, |
- bool case_sensitive); |
-BASE_EXPORT bool EndsWith(const base::string16& str, |
- const base::string16& search, |
- bool case_sensitive); |
+// Indicates case sensitivity of comparisons. Only ASCII case insensitivity |
+// is supported. Full Unicode case-insensitive conversions would need to go in |
+// base/i18n so it can use ICU. |
+// |
+// If you need to do Unicode-aware case-insensitive StartsWith/EndsWith, it's |
+// best to call base::i18n::ToLower() or base::i18n::FoldCase() (see |
+// base/i18n/case_conversion.h for usage advice) on the arguments, and then use |
+// the results to a case-sensitive comparison. |
+enum class CompareCase { |
+ SENSITIVE, |
+ INSENSITIVE_ASCII, |
+}; |
+BASE_EXPORT bool StartsWith(StringPiece str, |
+ StringPiece search_for, |
+ CompareCase case_sensitivity); |
+BASE_EXPORT bool StartsWith(StringPiece16 str, |
+ StringPiece16 search_for, |
+ CompareCase case_sensitivity); |
+BASE_EXPORT bool EndsWith(StringPiece str, |
+ StringPiece search_for, |
+ CompareCase case_sensitivity); |
+BASE_EXPORT bool EndsWith(StringPiece16 str, |
+ StringPiece16 search_for, |
+ CompareCase case_sensitivity); |
+ |
+// DEPRECATED. Returns true if str starts/ends with search, or false otherwise. |
+// TODO(brettw) remove in favor of the "enum" versions above. |
+inline bool StartsWithASCII(const std::string& str, |
+ const std::string& search, |
+ bool case_sensitive) { |
+ return StartsWith( |
+ StringPiece(str), StringPiece(search), |
+ case_sensitive ? CompareCase::SENSITIVE : CompareCase::INSENSITIVE_ASCII); |
+} |
// Determines the type of ASCII character, independent of locale (the C |
// library versions will change based on locale). |
@@ -364,20 +404,15 @@ inline bool IsHexDigit(Char c) { |
(c >= 'a' && c <= 'f'); |
} |
-template <typename Char> |
-inline char HexDigitToInt(Char c) { |
- DCHECK(IsHexDigit(c)); |
- if (c >= '0' && c <= '9') |
- return static_cast<char>(c - '0'); |
- if (c >= 'A' && c <= 'F') |
- return static_cast<char>(c - 'A' + 10); |
- if (c >= 'a' && c <= 'f') |
- return static_cast<char>(c - 'a' + 10); |
- return 0; |
-} |
+// Returns the integer corresponding to the given hex character. For example: |
+// '4' -> 4 |
+// 'a' -> 10 |
+// 'B' -> 11 |
+// Assumes the input is a valid hex character. DCHECKs in debug builds if not. |
+BASE_EXPORT char HexDigitToInt(wchar_t c); |
-// Returns true if it's a whitespace character. |
-inline bool IsWhitespace(wchar_t c) { |
+// Returns true if it's a Unicode whitespace character. |
+inline bool IsUnicodeWhitespace(wchar_t c) { |
return wcschr(base::kWhitespaceWide, c) != NULL; |
} |
@@ -385,20 +420,18 @@ inline bool IsWhitespace(wchar_t c) { |
// appropriate for use in any UI; use of FormatBytes and friends in ui/base is |
// highly recommended instead. TODO(avi): Figure out how to get callers to use |
// FormatBytes instead; remove this. |
-BASE_EXPORT base::string16 FormatBytesUnlocalized(int64 bytes); |
+BASE_EXPORT string16 FormatBytesUnlocalized(int64 bytes); |
// Starting at |start_offset| (usually 0), replace the first instance of |
// |find_this| with |replace_with|. |
-BASE_EXPORT void ReplaceFirstSubstringAfterOffset( |
- base::string16* str, |
- size_t start_offset, |
- const base::string16& find_this, |
- const base::string16& replace_with); |
-BASE_EXPORT void ReplaceFirstSubstringAfterOffset( |
- std::string* str, |
- size_t start_offset, |
- const std::string& find_this, |
- const std::string& replace_with); |
+BASE_EXPORT void ReplaceFirstSubstringAfterOffset(base::string16* str, |
+ size_t start_offset, |
+ StringPiece16 find_this, |
+ StringPiece16 replace_with); |
+BASE_EXPORT void ReplaceFirstSubstringAfterOffset(std::string* str, |
+ size_t start_offset, |
+ StringPiece find_this, |
+ StringPiece replace_with); |
// Starting at |start_offset| (usually 0), look through |str| and replace all |
// instances of |find_this| with |replace_with|. |
@@ -406,15 +439,14 @@ BASE_EXPORT void ReplaceFirstSubstringAfterOffset( |
// This does entire substrings; use std::replace in <algorithm> for single |
// characters, for example: |
// std::replace(str.begin(), str.end(), 'a', 'b'); |
-BASE_EXPORT void ReplaceSubstringsAfterOffset( |
- base::string16* str, |
- size_t start_offset, |
- const base::string16& find_this, |
- const base::string16& replace_with); |
+BASE_EXPORT void ReplaceSubstringsAfterOffset(string16* str, |
+ size_t start_offset, |
+ StringPiece16 find_this, |
+ StringPiece16 replace_with); |
BASE_EXPORT void ReplaceSubstringsAfterOffset(std::string* str, |
size_t start_offset, |
- const std::string& find_this, |
- const std::string& replace_with); |
+ StringPiece find_this, |
+ StringPiece replace_with); |
// Reserves enough memory in |str| to accommodate |length_with_null| characters, |
// sets the size of |str| to |length_with_null - 1| characters, and returns a |
@@ -436,72 +468,45 @@ BASE_EXPORT void ReplaceSubstringsAfterOffset(std::string* str, |
// of the string, and not doing that will mean people who access |str| rather |
// than str.c_str() will get back a string of whatever size |str| had on entry |
// to this function (probably 0). |
-template <class string_type> |
-inline typename string_type::value_type* WriteInto(string_type* str, |
- size_t length_with_null) { |
- DCHECK_GT(length_with_null, 1u); |
- str->reserve(length_with_null); |
- str->resize(length_with_null - 1); |
- return &((*str)[0]); |
-} |
- |
-//----------------------------------------------------------------------------- |
- |
-// Splits a string into its fields delimited by any of the characters in |
-// |delimiters|. Each field is added to the |tokens| vector. Returns the |
-// number of tokens found. |
-BASE_EXPORT size_t Tokenize(const base::string16& str, |
- const base::string16& delimiters, |
- std::vector<base::string16>* tokens); |
-BASE_EXPORT size_t Tokenize(const std::string& str, |
- const std::string& delimiters, |
- std::vector<std::string>* tokens); |
-BASE_EXPORT size_t Tokenize(const base::StringPiece& str, |
- const base::StringPiece& delimiters, |
- std::vector<base::StringPiece>* tokens); |
+BASE_EXPORT char* WriteInto(std::string* str, size_t length_with_null); |
+BASE_EXPORT char16* WriteInto(string16* str, size_t length_with_null); |
+#ifndef OS_WIN |
+BASE_EXPORT wchar_t* WriteInto(std::wstring* str, size_t length_with_null); |
+#endif |
// Does the opposite of SplitString(). |
-BASE_EXPORT base::string16 JoinString(const std::vector<base::string16>& parts, |
- base::char16 s); |
-BASE_EXPORT std::string JoinString( |
- const std::vector<std::string>& parts, char s); |
- |
-// Join |parts| using |separator|. |
-BASE_EXPORT std::string JoinString( |
- const std::vector<std::string>& parts, |
- const std::string& separator); |
-BASE_EXPORT base::string16 JoinString( |
- const std::vector<base::string16>& parts, |
- const base::string16& separator); |
+BASE_EXPORT std::string JoinString(const std::vector<std::string>& parts, |
+ StringPiece separator); |
+BASE_EXPORT string16 JoinString(const std::vector<string16>& parts, |
+ StringPiece16 separator); |
// Replace $1-$2-$3..$9 in the format string with |a|-|b|-|c|..|i| respectively. |
// Additionally, any number of consecutive '$' characters is replaced by that |
// number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be |
// NULL. This only allows you to use up to nine replacements. |
-BASE_EXPORT base::string16 ReplaceStringPlaceholders( |
- const base::string16& format_string, |
- const std::vector<base::string16>& subst, |
- std::vector<size_t>* offsets); |
+BASE_EXPORT string16 |
+ReplaceStringPlaceholders(const string16& format_string, |
+ const std::vector<string16>& subst, |
+ std::vector<size_t>* offsets); |
BASE_EXPORT std::string ReplaceStringPlaceholders( |
- const base::StringPiece& format_string, |
+ const StringPiece& format_string, |
const std::vector<std::string>& subst, |
std::vector<size_t>* offsets); |
// Single-string shortcut for ReplaceStringHolders. |offset| may be NULL. |
-BASE_EXPORT base::string16 ReplaceStringPlaceholders( |
- const base::string16& format_string, |
- const base::string16& a, |
- size_t* offset); |
- |
-// Returns true if the string passed in matches the pattern. The pattern |
-// string can contain wildcards like * and ? |
-// The backslash character (\) is an escape character for * and ? |
-// We limit the patterns to having a max of 16 * or ? characters. |
-// ? matches 0 or 1 character, while * matches 0 or more characters. |
-BASE_EXPORT bool MatchPattern(const base::StringPiece& string, |
- const base::StringPiece& pattern); |
-BASE_EXPORT bool MatchPattern(const base::string16& string, |
- const base::string16& pattern); |
+BASE_EXPORT string16 ReplaceStringPlaceholders(const string16& format_string, |
+ const string16& a, |
+ size_t* offset); |
+ |
+} // namespace base |
+ |
+#if defined(OS_WIN) |
+#include "base/strings/string_util_win.h" |
+#elif defined(OS_POSIX) |
+#include "base/strings/string_util_posix.h" |
+#else |
+#error Define string operations appropriately for your platform |
+#endif |
#endif // BASE_STRINGS_STRING_UTIL_H_ |