Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(41)

Unified Diff: base/strings/string_util.h

Issue 1641513004: Update //base to chromium 9659b08ea5a34f889dc4166217f438095ddc10d2 (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « base/strings/string_split_unittest.cc ('k') | base/strings/string_util.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: base/strings/string_util.h
diff --git a/base/strings/string_util.h b/base/strings/string_util.h
index bea44ae8fddbaa0992d2d76b5d2719950df70e1d..9e50a332259c5b60f9cc1a373ad860c151d9b9ac 100644
--- a/base/strings/string_util.h
+++ b/base/strings/string_util.h
@@ -21,23 +21,10 @@
namespace base {
-// C standard-library functions like "strncasecmp" and "snprintf" that aren't
-// cross-platform are provided as "base::strncasecmp", and their prototypes
-// are listed below. These functions are then implemented as inline calls
-// to the platform-specific equivalents in the platform-specific headers.
-
-// Compares the two strings s1 and s2 without regard to case using
-// the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if
-// s2 > s1 according to a lexicographic comparison.
-int strcasecmp(const char* s1, const char* s2);
-
-// Compares up to count characters of s1 and s2 without regard to case using
-// the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if
-// s2 > s1 according to a lexicographic comparison.
-int strncasecmp(const char* s1, const char* s2, size_t count);
-
-// Same as strncmp but for char16 strings.
-int strncmp16(const char16* s1, const char16* s2, size_t count);
+// C standard-library functions that aren't cross-platform are provided as
+// "base::...", and their prototypes are listed below. These functions are
+// then implemented as inline calls to the platform-specific equivalents in the
+// platform-specific headers.
// Wrapper for vsnprintf that always null-terminates and always returns the
// number of characters that would be in an untruncated formatted
@@ -59,6 +46,19 @@ inline int snprintf(char* buffer, size_t size, const char* format, ...) {
return result;
}
+// TODO(mark) http://crbug.com/472900 crashpad shouldn't use base while
+// being DEPSed in. This backwards-compat hack is provided until crashpad is
+// updated.
+#if defined(OS_WIN)
+inline int strcasecmp(const char* s1, const char* s2) {
+ return _stricmp(s1, s2);
+}
+#else // Posix
+inline int strcasecmp(const char* string1, const char* string2) {
+ return ::strcasecmp(string1, string2);
+}
+#endif
+
// BSD-style safe and consistent string copy functions.
// Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|.
// Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as
@@ -103,17 +103,14 @@ template <class Char> inline Char ToUpperASCII(Char c) {
return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c;
}
-// Function objects to aid in comparing/searching strings.
-
-template<typename Char> struct CaseInsensitiveCompare {
- public:
- bool operator()(Char x, Char y) const {
- // TODO(darin): Do we really want to do locale sensitive comparisons here?
- // See http://crbug.com/24917
- return tolower(x) == tolower(y);
- }
-};
-
+// Functor for case-insensitive ASCII comparisons for STL algorithms like
+// std::search.
+//
+// Note that a full Unicode version of this functor is not possible to write
+// because case mappings might change the number of characters, depend on
+// context (combining accents), and require handling UTF-16. If you need
+// proper Unicode support, use base::i18n::ToLower/FoldCase and then just
+// use a normal operator== on the result.
template<typename Char> struct CaseInsensitiveCompareASCII {
public:
bool operator()(Char x, Char y) const {
@@ -121,6 +118,22 @@ template<typename Char> struct CaseInsensitiveCompareASCII {
}
};
+// Like strcasecmp for case-insensitive ASCII characters only. Returns:
+// -1 (a < b)
+// 0 (a == b)
+// 1 (a > b)
+// (unlike strcasecmp which can return values greater or less than 1/-1). For
+// full Unicode support, use base::i18n::ToLower or base::i18h::FoldCase
+// and then just call the normal string operators on the result.
+BASE_EXPORT int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b);
+BASE_EXPORT int CompareCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b);
+
+// Equality for ASCII case-insensitive comparisons. For full Unicode support,
+// use base::i18n::ToLower or base::i18h::FoldCase and then compare with either
+// == or !=.
+BASE_EXPORT bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b);
+BASE_EXPORT bool EqualsCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b);
+
// These threadsafe functions return references to globally unique empty
// strings.
//
@@ -138,10 +151,12 @@ BASE_EXPORT const std::string& EmptyString();
BASE_EXPORT const string16& EmptyString16();
// Contains the set of characters representing whitespace in the corresponding
-// encoding. Null-terminated.
-BASE_EXPORT extern const wchar_t kWhitespaceWide[];
-BASE_EXPORT extern const char16 kWhitespaceUTF16[];
+// encoding. Null-terminated. The ASCII versions are the whitespaces as defined
+// by HTML5, and don't include control characters.
+BASE_EXPORT extern const wchar_t kWhitespaceWide[]; // Includes Unicode.
+BASE_EXPORT extern const char16 kWhitespaceUTF16[]; // Includes Unicode.
BASE_EXPORT extern const char kWhitespaceASCII[];
+BASE_EXPORT extern const char16 kWhitespaceASCIIAs16[]; // No unicode.
// Null-terminated string representing the UTF-8 byte order mark.
BASE_EXPORT extern const char kUtf8ByteOrderMark[];
@@ -150,10 +165,10 @@ BASE_EXPORT extern const char kUtf8ByteOrderMark[];
// if any characters were removed. |remove_chars| must be null-terminated.
// NOTE: Safe to use the same variable for both |input| and |output|.
BASE_EXPORT bool RemoveChars(const string16& input,
- const base::StringPiece16& remove_chars,
+ const StringPiece16& remove_chars,
string16* output);
BASE_EXPORT bool RemoveChars(const std::string& input,
- const base::StringPiece& remove_chars,
+ const StringPiece& remove_chars,
std::string* output);
// Replaces characters in |replace_chars| from anywhere in |input| with
@@ -162,49 +177,65 @@ BASE_EXPORT bool RemoveChars(const std::string& input,
// |replace_chars| must be null-terminated.
// NOTE: Safe to use the same variable for both |input| and |output|.
BASE_EXPORT bool ReplaceChars(const string16& input,
- const base::StringPiece16& replace_chars,
+ const StringPiece16& replace_chars,
const string16& replace_with,
string16* output);
BASE_EXPORT bool ReplaceChars(const std::string& input,
- const base::StringPiece& replace_chars,
+ const StringPiece& replace_chars,
const std::string& replace_with,
std::string* output);
+enum TrimPositions {
+ TRIM_NONE = 0,
+ TRIM_LEADING = 1 << 0,
+ TRIM_TRAILING = 1 << 1,
+ TRIM_ALL = TRIM_LEADING | TRIM_TRAILING,
+};
+
// Removes characters in |trim_chars| from the beginning and end of |input|.
-// |trim_chars| must be null-terminated.
-// NOTE: Safe to use the same variable for both |input| and |output|.
+// The 8-bit version only works on 8-bit characters, not UTF-8.
+//
+// It is safe to use the same variable for both |input| and |output| (this is
+// the normal usage to trim in-place).
BASE_EXPORT bool TrimString(const string16& input,
- const base::StringPiece16& trim_chars,
+ StringPiece16 trim_chars,
string16* output);
BASE_EXPORT bool TrimString(const std::string& input,
- const base::StringPiece& trim_chars,
+ StringPiece trim_chars,
std::string* output);
+// StringPiece versions of the above. The returned pieces refer to the original
+// buffer.
+BASE_EXPORT StringPiece16 TrimString(StringPiece16 input,
+ const StringPiece16& trim_chars,
+ TrimPositions positions);
+BASE_EXPORT StringPiece TrimString(StringPiece input,
+ const StringPiece& trim_chars,
+ TrimPositions positions);
+
// Truncates a string to the nearest UTF-8 character that will leave
// the string less than or equal to the specified byte size.
BASE_EXPORT void TruncateUTF8ToByteSize(const std::string& input,
const size_t byte_size,
std::string* output);
-// Trims any whitespace from either end of the input string. Returns where
-// whitespace was found.
-// The non-wide version has two functions:
-// * TrimWhitespaceASCII()
-// This function is for ASCII strings and only looks for ASCII whitespace;
-// Please choose the best one according to your usage.
+// Trims any whitespace from either end of the input string.
+//
+// The StringPiece versions return a substring referencing the input buffer.
+// The ASCII versions look only for ASCII whitespace.
+//
+// The std::string versions return where whitespace was found.
// NOTE: Safe to use the same variable for both input and output.
-enum TrimPositions {
- TRIM_NONE = 0,
- TRIM_LEADING = 1 << 0,
- TRIM_TRAILING = 1 << 1,
- TRIM_ALL = TRIM_LEADING | TRIM_TRAILING,
-};
BASE_EXPORT TrimPositions TrimWhitespace(const string16& input,
TrimPositions positions,
- base::string16* output);
+ string16* output);
+BASE_EXPORT StringPiece16 TrimWhitespace(StringPiece16 input,
+ TrimPositions positions);
BASE_EXPORT TrimPositions TrimWhitespaceASCII(const std::string& input,
TrimPositions positions,
std::string* output);
+BASE_EXPORT StringPiece TrimWhitespaceASCII(StringPiece input,
+ TrimPositions positions);
// Deprecated. This function is only for backward compatibility and calls
// TrimWhitespaceASCII().
@@ -315,32 +346,41 @@ BASE_EXPORT bool LowerCaseEqualsASCII(const char16* a_begin,
// strings are not ASCII.
BASE_EXPORT bool EqualsASCII(const string16& a, const StringPiece& b);
-} // namespace base
-
-#if defined(OS_WIN)
-#include "base/strings/string_util_win.h"
-#elif defined(OS_POSIX)
-#include "base/strings/string_util_posix.h"
-#else
-#error Define string operations appropriately for your platform
-#endif
-
-// Returns true if str starts with search, or false otherwise.
-BASE_EXPORT bool StartsWithASCII(const std::string& str,
- const std::string& search,
- bool case_sensitive);
-BASE_EXPORT bool StartsWith(const base::string16& str,
- const base::string16& search,
- bool case_sensitive);
-
-// Returns true if str ends with search, or false otherwise.
-BASE_EXPORT bool EndsWith(const std::string& str,
- const std::string& search,
- bool case_sensitive);
-BASE_EXPORT bool EndsWith(const base::string16& str,
- const base::string16& search,
- bool case_sensitive);
+// Indicates case sensitivity of comparisons. Only ASCII case insensitivity
+// is supported. Full Unicode case-insensitive conversions would need to go in
+// base/i18n so it can use ICU.
+//
+// If you need to do Unicode-aware case-insensitive StartsWith/EndsWith, it's
+// best to call base::i18n::ToLower() or base::i18n::FoldCase() (see
+// base/i18n/case_conversion.h for usage advice) on the arguments, and then use
+// the results to a case-sensitive comparison.
+enum class CompareCase {
+ SENSITIVE,
+ INSENSITIVE_ASCII,
+};
+BASE_EXPORT bool StartsWith(StringPiece str,
+ StringPiece search_for,
+ CompareCase case_sensitivity);
+BASE_EXPORT bool StartsWith(StringPiece16 str,
+ StringPiece16 search_for,
+ CompareCase case_sensitivity);
+BASE_EXPORT bool EndsWith(StringPiece str,
+ StringPiece search_for,
+ CompareCase case_sensitivity);
+BASE_EXPORT bool EndsWith(StringPiece16 str,
+ StringPiece16 search_for,
+ CompareCase case_sensitivity);
+
+// DEPRECATED. Returns true if str starts/ends with search, or false otherwise.
+// TODO(brettw) remove in favor of the "enum" versions above.
+inline bool StartsWithASCII(const std::string& str,
+ const std::string& search,
+ bool case_sensitive) {
+ return StartsWith(
+ StringPiece(str), StringPiece(search),
+ case_sensitive ? CompareCase::SENSITIVE : CompareCase::INSENSITIVE_ASCII);
+}
// Determines the type of ASCII character, independent of locale (the C
// library versions will change based on locale).
@@ -364,20 +404,15 @@ inline bool IsHexDigit(Char c) {
(c >= 'a' && c <= 'f');
}
-template <typename Char>
-inline char HexDigitToInt(Char c) {
- DCHECK(IsHexDigit(c));
- if (c >= '0' && c <= '9')
- return static_cast<char>(c - '0');
- if (c >= 'A' && c <= 'F')
- return static_cast<char>(c - 'A' + 10);
- if (c >= 'a' && c <= 'f')
- return static_cast<char>(c - 'a' + 10);
- return 0;
-}
+// Returns the integer corresponding to the given hex character. For example:
+// '4' -> 4
+// 'a' -> 10
+// 'B' -> 11
+// Assumes the input is a valid hex character. DCHECKs in debug builds if not.
+BASE_EXPORT char HexDigitToInt(wchar_t c);
-// Returns true if it's a whitespace character.
-inline bool IsWhitespace(wchar_t c) {
+// Returns true if it's a Unicode whitespace character.
+inline bool IsUnicodeWhitespace(wchar_t c) {
return wcschr(base::kWhitespaceWide, c) != NULL;
}
@@ -385,20 +420,18 @@ inline bool IsWhitespace(wchar_t c) {
// appropriate for use in any UI; use of FormatBytes and friends in ui/base is
// highly recommended instead. TODO(avi): Figure out how to get callers to use
// FormatBytes instead; remove this.
-BASE_EXPORT base::string16 FormatBytesUnlocalized(int64 bytes);
+BASE_EXPORT string16 FormatBytesUnlocalized(int64 bytes);
// Starting at |start_offset| (usually 0), replace the first instance of
// |find_this| with |replace_with|.
-BASE_EXPORT void ReplaceFirstSubstringAfterOffset(
- base::string16* str,
- size_t start_offset,
- const base::string16& find_this,
- const base::string16& replace_with);
-BASE_EXPORT void ReplaceFirstSubstringAfterOffset(
- std::string* str,
- size_t start_offset,
- const std::string& find_this,
- const std::string& replace_with);
+BASE_EXPORT void ReplaceFirstSubstringAfterOffset(base::string16* str,
+ size_t start_offset,
+ StringPiece16 find_this,
+ StringPiece16 replace_with);
+BASE_EXPORT void ReplaceFirstSubstringAfterOffset(std::string* str,
+ size_t start_offset,
+ StringPiece find_this,
+ StringPiece replace_with);
// Starting at |start_offset| (usually 0), look through |str| and replace all
// instances of |find_this| with |replace_with|.
@@ -406,15 +439,14 @@ BASE_EXPORT void ReplaceFirstSubstringAfterOffset(
// This does entire substrings; use std::replace in <algorithm> for single
// characters, for example:
// std::replace(str.begin(), str.end(), 'a', 'b');
-BASE_EXPORT void ReplaceSubstringsAfterOffset(
- base::string16* str,
- size_t start_offset,
- const base::string16& find_this,
- const base::string16& replace_with);
+BASE_EXPORT void ReplaceSubstringsAfterOffset(string16* str,
+ size_t start_offset,
+ StringPiece16 find_this,
+ StringPiece16 replace_with);
BASE_EXPORT void ReplaceSubstringsAfterOffset(std::string* str,
size_t start_offset,
- const std::string& find_this,
- const std::string& replace_with);
+ StringPiece find_this,
+ StringPiece replace_with);
// Reserves enough memory in |str| to accommodate |length_with_null| characters,
// sets the size of |str| to |length_with_null - 1| characters, and returns a
@@ -436,72 +468,45 @@ BASE_EXPORT void ReplaceSubstringsAfterOffset(std::string* str,
// of the string, and not doing that will mean people who access |str| rather
// than str.c_str() will get back a string of whatever size |str| had on entry
// to this function (probably 0).
-template <class string_type>
-inline typename string_type::value_type* WriteInto(string_type* str,
- size_t length_with_null) {
- DCHECK_GT(length_with_null, 1u);
- str->reserve(length_with_null);
- str->resize(length_with_null - 1);
- return &((*str)[0]);
-}
-
-//-----------------------------------------------------------------------------
-
-// Splits a string into its fields delimited by any of the characters in
-// |delimiters|. Each field is added to the |tokens| vector. Returns the
-// number of tokens found.
-BASE_EXPORT size_t Tokenize(const base::string16& str,
- const base::string16& delimiters,
- std::vector<base::string16>* tokens);
-BASE_EXPORT size_t Tokenize(const std::string& str,
- const std::string& delimiters,
- std::vector<std::string>* tokens);
-BASE_EXPORT size_t Tokenize(const base::StringPiece& str,
- const base::StringPiece& delimiters,
- std::vector<base::StringPiece>* tokens);
+BASE_EXPORT char* WriteInto(std::string* str, size_t length_with_null);
+BASE_EXPORT char16* WriteInto(string16* str, size_t length_with_null);
+#ifndef OS_WIN
+BASE_EXPORT wchar_t* WriteInto(std::wstring* str, size_t length_with_null);
+#endif
// Does the opposite of SplitString().
-BASE_EXPORT base::string16 JoinString(const std::vector<base::string16>& parts,
- base::char16 s);
-BASE_EXPORT std::string JoinString(
- const std::vector<std::string>& parts, char s);
-
-// Join |parts| using |separator|.
-BASE_EXPORT std::string JoinString(
- const std::vector<std::string>& parts,
- const std::string& separator);
-BASE_EXPORT base::string16 JoinString(
- const std::vector<base::string16>& parts,
- const base::string16& separator);
+BASE_EXPORT std::string JoinString(const std::vector<std::string>& parts,
+ StringPiece separator);
+BASE_EXPORT string16 JoinString(const std::vector<string16>& parts,
+ StringPiece16 separator);
// Replace $1-$2-$3..$9 in the format string with |a|-|b|-|c|..|i| respectively.
// Additionally, any number of consecutive '$' characters is replaced by that
// number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be
// NULL. This only allows you to use up to nine replacements.
-BASE_EXPORT base::string16 ReplaceStringPlaceholders(
- const base::string16& format_string,
- const std::vector<base::string16>& subst,
- std::vector<size_t>* offsets);
+BASE_EXPORT string16
+ReplaceStringPlaceholders(const string16& format_string,
+ const std::vector<string16>& subst,
+ std::vector<size_t>* offsets);
BASE_EXPORT std::string ReplaceStringPlaceholders(
- const base::StringPiece& format_string,
+ const StringPiece& format_string,
const std::vector<std::string>& subst,
std::vector<size_t>* offsets);
// Single-string shortcut for ReplaceStringHolders. |offset| may be NULL.
-BASE_EXPORT base::string16 ReplaceStringPlaceholders(
- const base::string16& format_string,
- const base::string16& a,
- size_t* offset);
-
-// Returns true if the string passed in matches the pattern. The pattern
-// string can contain wildcards like * and ?
-// The backslash character (\) is an escape character for * and ?
-// We limit the patterns to having a max of 16 * or ? characters.
-// ? matches 0 or 1 character, while * matches 0 or more characters.
-BASE_EXPORT bool MatchPattern(const base::StringPiece& string,
- const base::StringPiece& pattern);
-BASE_EXPORT bool MatchPattern(const base::string16& string,
- const base::string16& pattern);
+BASE_EXPORT string16 ReplaceStringPlaceholders(const string16& format_string,
+ const string16& a,
+ size_t* offset);
+
+} // namespace base
+
+#if defined(OS_WIN)
+#include "base/strings/string_util_win.h"
+#elif defined(OS_POSIX)
+#include "base/strings/string_util_posix.h"
+#else
+#error Define string operations appropriately for your platform
+#endif
#endif // BASE_STRINGS_STRING_UTIL_H_
« no previous file with comments | « base/strings/string_split_unittest.cc ('k') | base/strings/string_util.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698