| Index: base/strings/string_split.h
|
| diff --git a/base/strings/string_split.h b/base/strings/string_split.h
|
| index 55d8cb377edfa597c19173b0e1832b3ce478368f..dc4108d327ff2a4653bd58090bfcdc19995150b4 100644
|
| --- a/base/strings/string_split.h
|
| +++ b/base/strings/string_split.h
|
| @@ -11,28 +11,73 @@
|
|
|
| #include "base/base_export.h"
|
| #include "base/strings/string16.h"
|
| +#include "base/strings/string_piece.h"
|
|
|
| namespace base {
|
|
|
| -// Splits |str| into a vector of strings delimited by |c|, placing the results
|
| -// in |r|. If several instances of |c| are contiguous, or if |str| begins with
|
| -// or ends with |c|, then an empty string is inserted.
|
| +enum WhitespaceHandling {
|
| + KEEP_WHITESPACE,
|
| + TRIM_WHITESPACE,
|
| +};
|
| +
|
| +enum SplitResult {
|
| + // Strictly return all results.
|
| + //
|
| + // If the input is ",," and the separator is ',' this will return a
|
| + // vector of three empty strings.
|
| + SPLIT_WANT_ALL,
|
| +
|
| + // Only nonempty results will be added to the results. Multiple separators
|
| + // will be coalesced. Separators at the beginning and end of the input will
|
| + // be ignored. With TRIM_WHITESPACE, whitespace-only results will be dropped.
|
| + //
|
| + // If the input is ",," and the separator is ',', this will return an empty
|
| + // vector.
|
| + SPLIT_WANT_NONEMPTY,
|
| +};
|
| +
|
| +// Split the given string on ANY of the given separators, returning copies of
|
| +// the result.
|
| //
|
| -// Every substring is trimmed of any leading or trailing white space.
|
| -// NOTE: |c| must be in BMP (Basic Multilingual Plane)
|
| -BASE_EXPORT void SplitString(const string16& str,
|
| - char16 c,
|
| - std::vector<string16>* r);
|
| +// To split on either commas or semicolons, keeping all whitespace:
|
| +//
|
| +// std::vector<std::string> tokens = base::SplitString(
|
| +// input, ",;", base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL);
|
| +BASE_EXPORT std::vector<std::string> SplitString(
|
| + StringPiece input,
|
| + StringPiece separators,
|
| + WhitespaceHandling whitespace,
|
| + SplitResult result_type);
|
| +BASE_EXPORT std::vector<string16> SplitString(
|
| + StringPiece16 input,
|
| + StringPiece16 separators,
|
| + WhitespaceHandling whitespace,
|
| + SplitResult result_type);
|
|
|
| -// |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which
|
| -// the trailing byte of a multi-byte character can be in the ASCII range.
|
| -// UTF-8, and other single/multi-byte ASCII-compatible encodings are OK.
|
| -// Note: |c| must be in the ASCII range.
|
| -BASE_EXPORT void SplitString(const std::string& str,
|
| - char c,
|
| - std::vector<std::string>* r);
|
| +// Like SplitString above except it returns a vector of StringPieces which
|
| +// reference the original buffer without copying. Although you have to be
|
| +// careful to keep the original string unmodified, this provides an efficient
|
| +// way to iterate through tokens in a string.
|
| +//
|
| +// To iterate through all whitespace-separated tokens in an input string:
|
| +//
|
| +// for (const auto& cur :
|
| +// base::SplitStringPiece(input, base::kWhitespaceASCII,
|
| +// base::KEEP_WHITESPACE,
|
| +// base::SPLIT_WANT_NONEMPTY)) {
|
| +// ...
|
| +BASE_EXPORT std::vector<StringPiece> SplitStringPiece(
|
| + StringPiece input,
|
| + StringPiece separators,
|
| + WhitespaceHandling whitespace,
|
| + SplitResult result_type);
|
| +BASE_EXPORT std::vector<StringPiece16> SplitStringPiece(
|
| + StringPiece16 input,
|
| + StringPiece16 separators,
|
| + WhitespaceHandling whitespace,
|
| + SplitResult result_type);
|
|
|
| -typedef std::vector<std::pair<std::string, std::string> > StringPairs;
|
| +using StringPairs = std::vector<std::pair<std::string, std::string>>;
|
|
|
| // Splits |line| into key value pairs according to the given delimiters and
|
| // removes whitespace leading each key and trailing each value. Returns true
|
| @@ -43,7 +88,12 @@ BASE_EXPORT bool SplitStringIntoKeyValuePairs(const std::string& line,
|
| char key_value_pair_delimiter,
|
| StringPairs* key_value_pairs);
|
|
|
| -// The same as SplitString, but use a substring delimiter instead of a char.
|
| +// Similar to SplitString, but use a substring delimiter instead of a list of
|
| +// characters that are all possible delimiters.
|
| +//
|
| +// TODO(brettw) this should probably be changed and expanded to provide a
|
| +// mirror of the SplitString[Piece] API above, just with the different
|
| +// delimiter handling.
|
| BASE_EXPORT void SplitStringUsingSubstr(const string16& str,
|
| const string16& s,
|
| std::vector<string16>* r);
|
| @@ -51,27 +101,53 @@ BASE_EXPORT void SplitStringUsingSubstr(const std::string& str,
|
| const std::string& s,
|
| std::vector<std::string>* r);
|
|
|
| +// -----------------------------------------------------------------------------
|
| +// Backwards-compat wrappers
|
| +//
|
| +// New code should use one of the more general variants above.
|
| +// TODO(brettw) remove these and convert to the versions above.
|
| +
|
| +// Splits |str| into a vector of strings delimited by |c|, placing the results
|
| +// in |r|. If several instances of |c| are contiguous, or if |str| begins with
|
| +// or ends with |c|, then an empty string is inserted.
|
| +//
|
| +// Every substring is trimmed of any leading or trailing white space.
|
| +// NOTE: |c| must be in BMP (Basic Multilingual Plane)
|
| +BASE_EXPORT void SplitString(const string16& str,
|
| + char16 c,
|
| + std::vector<string16>* r);
|
| +
|
| +// |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which
|
| +// the trailing byte of a multi-byte character can be in the ASCII range.
|
| +// UTF-8, and other single/multi-byte ASCII-compatible encodings are OK.
|
| +// Note: |c| must be in the ASCII range.
|
| +BASE_EXPORT void SplitString(const std::string& str,
|
| + char c,
|
| + std::vector<std::string>* r);
|
| +
|
| // The same as SplitString, but don't trim white space.
|
| // NOTE: |c| must be in BMP (Basic Multilingual Plane)
|
| -BASE_EXPORT void SplitStringDontTrim(const string16& str,
|
| +BASE_EXPORT void SplitStringDontTrim(StringPiece16 str,
|
| char16 c,
|
| std::vector<string16>* r);
|
| // |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which
|
| // the trailing byte of a multi-byte character can be in the ASCII range.
|
| // UTF-8, and other single/multi-byte ASCII-compatible encodings are OK.
|
| // Note: |c| must be in the ASCII range.
|
| -BASE_EXPORT void SplitStringDontTrim(const std::string& str,
|
| +BASE_EXPORT void SplitStringDontTrim(StringPiece str,
|
| char c,
|
| - std::vector<std::string>* r);
|
| + std::vector<std::string>* result);
|
|
|
| -// WARNING: this uses whitespace as defined by the HTML5 spec. If you need
|
| -// a function similar to this but want to trim all types of whitespace, then
|
| -// factor this out into a function that takes a string containing the characters
|
| -// that are treated as whitespace.
|
| +// WARNING: this uses whitespace as defined by the HTML5 spec (ASCII whitespace
|
| +// only).
|
| +//
|
| +// The difference between this and calling SplitString with the whitespace
|
| +// characters as separators is the treatment of the first element when the
|
| +// string starts with whitespace.
|
| //
|
| -// Splits the string along whitespace (where whitespace is the five space
|
| -// characters defined by HTML 5). Each contiguous block of non-whitespace
|
| -// characters is added to result.
|
| +// Input SplitString SplitStringAlongWhitespace
|
| +// --------------------------------------------------------
|
| +// " a " "", "a" "a"
|
| BASE_EXPORT void SplitStringAlongWhitespace(const string16& str,
|
| std::vector<string16>* result);
|
| BASE_EXPORT void SplitStringAlongWhitespace(const std::string& str,
|
|
|