base/strings/string_util.h - Issue 1641513004: Update //base to chromium 9659b08ea5a34f889dc4166217f438095ddc10d2

Unified Diff: base/strings/string_util.h

Issue 1641513004: Update //base to chromium 9659b08ea5a34f889dc4166217f438095ddc10d2 (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: base/strings/string_util.h

diff --git a/base/strings/string_util.h b/base/strings/string_util.h

index bea44ae8fddbaa0992d2d76b5d2719950df70e1d..9e50a332259c5b60f9cc1a373ad860c151d9b9ac 100644

--- a/base/strings/string_util.h

+++ b/base/strings/string_util.h

@@ -21,23 +21,10 @@

namespace base {

-// C standard-library functions like "strncasecmp" and "snprintf" that aren't

-// cross-platform are provided as "base::strncasecmp", and their prototypes

-// are listed below. These functions are then implemented as inline calls

-// to the platform-specific equivalents in the platform-specific headers.

-// Compares the two strings s1 and s2 without regard to case using

-// the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if

-// s2 > s1 according to a lexicographic comparison.

-int strcasecmp(const char* s1, const char* s2);

-// Compares up to count characters of s1 and s2 without regard to case using

-// the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if

-// s2 > s1 according to a lexicographic comparison.

-int strncasecmp(const char* s1, const char* s2, size_t count);

-// Same as strncmp but for char16 strings.

-int strncmp16(const char16* s1, const char16* s2, size_t count);

+// C standard-library functions that aren't cross-platform are provided as

+// "base::...", and their prototypes are listed below. These functions are

+// then implemented as inline calls to the platform-specific equivalents in the

+// platform-specific headers.

// Wrapper for vsnprintf that always null-terminates and always returns the

// number of characters that would be in an untruncated formatted

@@ -59,6 +46,19 @@ inline int snprintf(char* buffer, size_t size, const char* format, ...) {

return result;

}

+// TODO(mark) http://crbug.com/472900 crashpad shouldn't use base while

+// being DEPSed in. This backwards-compat hack is provided until crashpad is

+// updated.

+#if defined(OS_WIN)

+inline int strcasecmp(const char* s1, const char* s2) {

+ return _stricmp(s1, s2);

+#else // Posix

+inline int strcasecmp(const char* string1, const char* string2) {

+ return ::strcasecmp(string1, string2);

+#endif

// BSD-style safe and consistent string copy functions.

// Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|.

// Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as

@@ -103,17 +103,14 @@ template <class Char> inline Char ToUpperASCII(Char c) {

return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c;

}

-// Function objects to aid in comparing/searching strings.

-template<typename Char> struct CaseInsensitiveCompare {

- public:

- bool operator()(Char x, Char y) const {

- // TODO(darin): Do we really want to do locale sensitive comparisons here?

- // See http://crbug.com/24917

- return tolower(x) == tolower(y);

- }

-};

+// Functor for case-insensitive ASCII comparisons for STL algorithms like

+// std::search.

+//

+// Note that a full Unicode version of this functor is not possible to write

+// because case mappings might change the number of characters, depend on

+// context (combining accents), and require handling UTF-16. If you need

+// proper Unicode support, use base::i18n::ToLower/FoldCase and then just

+// use a normal operator== on the result.

template<typename Char> struct CaseInsensitiveCompareASCII {

public:

bool operator()(Char x, Char y) const {

@@ -121,6 +118,22 @@ template<typename Char> struct CaseInsensitiveCompareASCII {

}

};

+// Like strcasecmp for case-insensitive ASCII characters only. Returns:

+// -1 (a < b)

+// 0 (a == b)

+// 1 (a > b)

+// (unlike strcasecmp which can return values greater or less than 1/-1). For

+// full Unicode support, use base::i18n::ToLower or base::i18h::FoldCase

+// and then just call the normal string operators on the result.

+BASE_EXPORT int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b);

+BASE_EXPORT int CompareCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b);

+// Equality for ASCII case-insensitive comparisons. For full Unicode support,

+// use base::i18n::ToLower or base::i18h::FoldCase and then compare with either

+// == or !=.

+BASE_EXPORT bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b);

+BASE_EXPORT bool EqualsCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b);

// These threadsafe functions return references to globally unique empty

// strings.

@@ -138,10 +151,12 @@ BASE_EXPORT const std::string& EmptyString();

BASE_EXPORT const string16& EmptyString16();

// Contains the set of characters representing whitespace in the corresponding

-// encoding. Null-terminated.

-BASE_EXPORT extern const wchar_t kWhitespaceWide[];

-BASE_EXPORT extern const char16 kWhitespaceUTF16[];

+// encoding. Null-terminated. The ASCII versions are the whitespaces as defined

+// by HTML5, and don't include control characters.

+BASE_EXPORT extern const wchar_t kWhitespaceWide[]; // Includes Unicode.

+BASE_EXPORT extern const char16 kWhitespaceUTF16[]; // Includes Unicode.

BASE_EXPORT extern const char kWhitespaceASCII[];

+BASE_EXPORT extern const char16 kWhitespaceASCIIAs16[]; // No unicode.

// Null-terminated string representing the UTF-8 byte order mark.

BASE_EXPORT extern const char kUtf8ByteOrderMark[];

@@ -150,10 +165,10 @@ BASE_EXPORT extern const char kUtf8ByteOrderMark[];

// if any characters were removed. |remove_chars| must be null-terminated.

// NOTE: Safe to use the same variable for both |input| and |output|.

BASE_EXPORT bool RemoveChars(const string16& input,

- const base::StringPiece16& remove_chars,

+ const StringPiece16& remove_chars,

string16* output);

BASE_EXPORT bool RemoveChars(const std::string& input,

- const base::StringPiece& remove_chars,

+ const StringPiece& remove_chars,

std::string* output);

// Replaces characters in |replace_chars| from anywhere in |input| with

@@ -162,49 +177,65 @@ BASE_EXPORT bool RemoveChars(const std::string& input,

// |replace_chars| must be null-terminated.

// NOTE: Safe to use the same variable for both |input| and |output|.

BASE_EXPORT bool ReplaceChars(const string16& input,

- const base::StringPiece16& replace_chars,

+ const StringPiece16& replace_chars,

const string16& replace_with,

string16* output);

BASE_EXPORT bool ReplaceChars(const std::string& input,

- const base::StringPiece& replace_chars,

+ const StringPiece& replace_chars,

const std::string& replace_with,

std::string* output);

+enum TrimPositions {

+ TRIM_NONE = 0,

+ TRIM_LEADING = 1 << 0,

+ TRIM_TRAILING = 1 << 1,

+ TRIM_ALL = TRIM_LEADING | TRIM_TRAILING,

+};

// Removes characters in |trim_chars| from the beginning and end of |input|.

-// |trim_chars| must be null-terminated.

-// NOTE: Safe to use the same variable for both |input| and |output|.

+// The 8-bit version only works on 8-bit characters, not UTF-8.

+//

+// It is safe to use the same variable for both |input| and |output| (this is

+// the normal usage to trim in-place).

BASE_EXPORT bool TrimString(const string16& input,

- const base::StringPiece16& trim_chars,

+ StringPiece16 trim_chars,

string16* output);

BASE_EXPORT bool TrimString(const std::string& input,

- const base::StringPiece& trim_chars,

+ StringPiece trim_chars,

std::string* output);

+// StringPiece versions of the above. The returned pieces refer to the original

+// buffer.

+BASE_EXPORT StringPiece16 TrimString(StringPiece16 input,

+ const StringPiece16& trim_chars,

+ TrimPositions positions);

+BASE_EXPORT StringPiece TrimString(StringPiece input,

+ const StringPiece& trim_chars,

+ TrimPositions positions);

// Truncates a string to the nearest UTF-8 character that will leave

// the string less than or equal to the specified byte size.

BASE_EXPORT void TruncateUTF8ToByteSize(const std::string& input,

const size_t byte_size,

std::string* output);

-// Trims any whitespace from either end of the input string. Returns where

-// whitespace was found.

-// The non-wide version has two functions:

-// * TrimWhitespaceASCII()

-// This function is for ASCII strings and only looks for ASCII whitespace;

-// Please choose the best one according to your usage.

+// Trims any whitespace from either end of the input string.

+//

+// The StringPiece versions return a substring referencing the input buffer.

+// The ASCII versions look only for ASCII whitespace.

+//

+// The std::string versions return where whitespace was found.

// NOTE: Safe to use the same variable for both input and output.

-enum TrimPositions {

- TRIM_NONE = 0,

- TRIM_LEADING = 1 << 0,

- TRIM_TRAILING = 1 << 1,

- TRIM_ALL = TRIM_LEADING | TRIM_TRAILING,

-};

BASE_EXPORT TrimPositions TrimWhitespace(const string16& input,

TrimPositions positions,

- base::string16* output);

+ string16* output);

+BASE_EXPORT StringPiece16 TrimWhitespace(StringPiece16 input,

+ TrimPositions positions);

BASE_EXPORT TrimPositions TrimWhitespaceASCII(const std::string& input,

TrimPositions positions,

std::string* output);

+BASE_EXPORT StringPiece TrimWhitespaceASCII(StringPiece input,

+ TrimPositions positions);

// Deprecated. This function is only for backward compatibility and calls

// TrimWhitespaceASCII().

@@ -315,32 +346,41 @@ BASE_EXPORT bool LowerCaseEqualsASCII(const char16* a_begin,

// strings are not ASCII.

BASE_EXPORT bool EqualsASCII(const string16& a, const StringPiece& b);

-} // namespace base

-#if defined(OS_WIN)

-#include "base/strings/string_util_win.h"

-#elif defined(OS_POSIX)

-#include "base/strings/string_util_posix.h"

-#else

-#error Define string operations appropriately for your platform

-#endif

-// Returns true if str starts with search, or false otherwise.

-BASE_EXPORT bool StartsWithASCII(const std::string& str,

- const std::string& search,

- bool case_sensitive);

-BASE_EXPORT bool StartsWith(const base::string16& str,

- const base::string16& search,

- bool case_sensitive);

-// Returns true if str ends with search, or false otherwise.

-BASE_EXPORT bool EndsWith(const std::string& str,

- const std::string& search,

- bool case_sensitive);

-BASE_EXPORT bool EndsWith(const base::string16& str,

- const base::string16& search,

- bool case_sensitive);

+// Indicates case sensitivity of comparisons. Only ASCII case insensitivity

+// is supported. Full Unicode case-insensitive conversions would need to go in

+// base/i18n so it can use ICU.

+//

+// If you need to do Unicode-aware case-insensitive StartsWith/EndsWith, it's

+// best to call base::i18n::ToLower() or base::i18n::FoldCase() (see

+// base/i18n/case_conversion.h for usage advice) on the arguments, and then use

+// the results to a case-sensitive comparison.

+enum class CompareCase {

+ SENSITIVE,

+ INSENSITIVE_ASCII,

+};

+BASE_EXPORT bool StartsWith(StringPiece str,

+ StringPiece search_for,

+ CompareCase case_sensitivity);

+BASE_EXPORT bool StartsWith(StringPiece16 str,

+ StringPiece16 search_for,

+ CompareCase case_sensitivity);

+BASE_EXPORT bool EndsWith(StringPiece str,

+ StringPiece search_for,

+ CompareCase case_sensitivity);

+BASE_EXPORT bool EndsWith(StringPiece16 str,

+ StringPiece16 search_for,

+ CompareCase case_sensitivity);

+// DEPRECATED. Returns true if str starts/ends with search, or false otherwise.

+// TODO(brettw) remove in favor of the "enum" versions above.

+inline bool StartsWithASCII(const std::string& str,

+ const std::string& search,

+ bool case_sensitive) {

+ return StartsWith(

+ StringPiece(str), StringPiece(search),

+ case_sensitive ? CompareCase::SENSITIVE : CompareCase::INSENSITIVE_ASCII);

// Determines the type of ASCII character, independent of locale (the C

// library versions will change based on locale).

@@ -364,20 +404,15 @@ inline bool IsHexDigit(Char c) {

(c >= 'a' && c <= 'f');

}

-template <typename Char>

-inline char HexDigitToInt(Char c) {

- DCHECK(IsHexDigit(c));

- if (c >= '0' && c <= '9')

- return static_cast<char>(c - '0');

- if (c >= 'A' && c <= 'F')

- return static_cast<char>(c - 'A' + 10);

- if (c >= 'a' && c <= 'f')

- return static_cast<char>(c - 'a' + 10);

- return 0;

+// Returns the integer corresponding to the given hex character. For example:

+// '4' -> 4

+// 'a' -> 10

+// 'B' -> 11

+// Assumes the input is a valid hex character. DCHECKs in debug builds if not.

+BASE_EXPORT char HexDigitToInt(wchar_t c);

-// Returns true if it's a whitespace character.

-inline bool IsWhitespace(wchar_t c) {

+// Returns true if it's a Unicode whitespace character.

+inline bool IsUnicodeWhitespace(wchar_t c) {

return wcschr(base::kWhitespaceWide, c) != NULL;

}

@@ -385,20 +420,18 @@ inline bool IsWhitespace(wchar_t c) {

// appropriate for use in any UI; use of FormatBytes and friends in ui/base is

// highly recommended instead. TODO(avi): Figure out how to get callers to use

// FormatBytes instead; remove this.

-BASE_EXPORT base::string16 FormatBytesUnlocalized(int64 bytes);

+BASE_EXPORT string16 FormatBytesUnlocalized(int64 bytes);

// Starting at |start_offset| (usually 0), replace the first instance of

// |find_this| with |replace_with|.

-BASE_EXPORT void ReplaceFirstSubstringAfterOffset(

- base::string16* str,

- size_t start_offset,

- const base::string16& find_this,

- const base::string16& replace_with);

-BASE_EXPORT void ReplaceFirstSubstringAfterOffset(

- std::string* str,

- size_t start_offset,

- const std::string& find_this,

- const std::string& replace_with);

+BASE_EXPORT void ReplaceFirstSubstringAfterOffset(base::string16* str,

+ size_t start_offset,

+ StringPiece16 find_this,

+ StringPiece16 replace_with);

+BASE_EXPORT void ReplaceFirstSubstringAfterOffset(std::string* str,

+ size_t start_offset,

+ StringPiece find_this,

+ StringPiece replace_with);

// Starting at |start_offset| (usually 0), look through |str| and replace all

// instances of |find_this| with |replace_with|.

@@ -406,15 +439,14 @@ BASE_EXPORT void ReplaceFirstSubstringAfterOffset(

// This does entire substrings; use std::replace in <algorithm> for single

// characters, for example:

// std::replace(str.begin(), str.end(), 'a', 'b');

-BASE_EXPORT void ReplaceSubstringsAfterOffset(

- base::string16* str,

- size_t start_offset,

- const base::string16& find_this,

- const base::string16& replace_with);

+BASE_EXPORT void ReplaceSubstringsAfterOffset(string16* str,

+ size_t start_offset,

+ StringPiece16 find_this,

+ StringPiece16 replace_with);

BASE_EXPORT void ReplaceSubstringsAfterOffset(std::string* str,

size_t start_offset,

- const std::string& find_this,

- const std::string& replace_with);

+ StringPiece find_this,

+ StringPiece replace_with);

// Reserves enough memory in |str| to accommodate |length_with_null| characters,

// sets the size of |str| to |length_with_null - 1| characters, and returns a

@@ -436,72 +468,45 @@ BASE_EXPORT void ReplaceSubstringsAfterOffset(std::string* str,

// of the string, and not doing that will mean people who access |str| rather

// than str.c_str() will get back a string of whatever size |str| had on entry

// to this function (probably 0).

-template <class string_type>

-inline typename string_type::value_type* WriteInto(string_type* str,

- size_t length_with_null) {

- DCHECK_GT(length_with_null, 1u);

- str->reserve(length_with_null);

- str->resize(length_with_null - 1);

- return &((*str)[0]);

-//-----------------------------------------------------------------------------

-// Splits a string into its fields delimited by any of the characters in

-// |delimiters|. Each field is added to the |tokens| vector. Returns the

-// number of tokens found.

-BASE_EXPORT size_t Tokenize(const base::string16& str,

- const base::string16& delimiters,

- std::vector<base::string16>* tokens);

-BASE_EXPORT size_t Tokenize(const std::string& str,

- const std::string& delimiters,

- std::vector<std::string>* tokens);

-BASE_EXPORT size_t Tokenize(const base::StringPiece& str,

- const base::StringPiece& delimiters,

- std::vector<base::StringPiece>* tokens);

+BASE_EXPORT char* WriteInto(std::string* str, size_t length_with_null);

+BASE_EXPORT char16* WriteInto(string16* str, size_t length_with_null);

+#ifndef OS_WIN

+BASE_EXPORT wchar_t* WriteInto(std::wstring* str, size_t length_with_null);

+#endif

// Does the opposite of SplitString().

-BASE_EXPORT base::string16 JoinString(const std::vector<base::string16>& parts,

- base::char16 s);

-BASE_EXPORT std::string JoinString(

- const std::vector<std::string>& parts, char s);

-// Join |parts| using |separator|.

-BASE_EXPORT std::string JoinString(

- const std::vector<std::string>& parts,

- const std::string& separator);

-BASE_EXPORT base::string16 JoinString(

- const std::vector<base::string16>& parts,

- const base::string16& separator);

+BASE_EXPORT std::string JoinString(const std::vector<std::string>& parts,

+ StringPiece separator);

+BASE_EXPORT string16 JoinString(const std::vector<string16>& parts,

+ StringPiece16 separator);

// Replace $1-$2-$3..$9 in the format string with |a|-|b|-|c|..|i| respectively.

// Additionally, any number of consecutive '$' characters is replaced by that

// number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be

// NULL. This only allows you to use up to nine replacements.

-BASE_EXPORT base::string16 ReplaceStringPlaceholders(

- const base::string16& format_string,

- const std::vector<base::string16>& subst,

- std::vector<size_t>* offsets);

+BASE_EXPORT string16

+ReplaceStringPlaceholders(const string16& format_string,

+ const std::vector<string16>& subst,

+ std::vector<size_t>* offsets);

BASE_EXPORT std::string ReplaceStringPlaceholders(

- const base::StringPiece& format_string,

+ const StringPiece& format_string,

const std::vector<std::string>& subst,

std::vector<size_t>* offsets);

// Single-string shortcut for ReplaceStringHolders. |offset| may be NULL.

-BASE_EXPORT base::string16 ReplaceStringPlaceholders(

- const base::string16& format_string,

- const base::string16& a,

- size_t* offset);

-// Returns true if the string passed in matches the pattern. The pattern

-// string can contain wildcards like * and ?

-// The backslash character (\) is an escape character for * and ?

-// We limit the patterns to having a max of 16 * or ? characters.

-// ? matches 0 or 1 character, while * matches 0 or more characters.

-BASE_EXPORT bool MatchPattern(const base::StringPiece& string,

- const base::StringPiece& pattern);

-BASE_EXPORT bool MatchPattern(const base::string16& string,

- const base::string16& pattern);

+BASE_EXPORT string16 ReplaceStringPlaceholders(const string16& format_string,

+ const string16& a,

+ size_t* offset);

+} // namespace base

+#if defined(OS_WIN)

+#include "base/strings/string_util_win.h"

+#elif defined(OS_POSIX)

+#include "base/strings/string_util_posix.h"

+#else

+#error Define string operations appropriately for your platform

+#endif

#endif // BASE_STRINGS_STRING_UTIL_H_

« no previous file with comments | « base/strings/string_split_unittest.cc ('k') | base/strings/string_util.cc » ('j') | no next file with comments »