Chromium Code Reviews| Index: base/strings/string_split.h |
| diff --git a/base/strings/string_split.h b/base/strings/string_split.h |
| index 55d8cb377edfa597c19173b0e1832b3ce478368f..59708ab5648b62ee89ac49d2f589ce513566d80f 100644 |
| --- a/base/strings/string_split.h |
| +++ b/base/strings/string_split.h |
| @@ -11,28 +11,73 @@ |
| #include "base/base_export.h" |
| #include "base/strings/string16.h" |
| +#include "base/strings/string_piece.h" |
| namespace base { |
| -// Splits |str| into a vector of strings delimited by |c|, placing the results |
| -// in |r|. If several instances of |c| are contiguous, or if |str| begins with |
| -// or ends with |c|, then an empty string is inserted. |
| +enum WhitespaceHandling { |
| + KEEP_WHITESPACE, |
| + TRIM_WHITESPACE, |
| +}; |
| + |
| +enum SplitResult { |
| + // Strictly return all results. |
| + // |
| + // If the input is ",," and the separator is ',' this will return a |
|
danakj
2015/06/12 17:58:12
thanks this is nicer
|
| + // vector of three empty strings. |
| + SPLIT_WANT_ALL, |
| + |
| + // Only nonempty results will be added to the results. Multiple separators |
| + // will be coalesced. Separators at the beginning and end of the input will |
| + // be ignored. With TRIM_WHITESPACE, whitespace-only results will be dropped. |
| + // |
| + // If the input is ",," and the separator is ',', this will return an empty |
| + // vector. |
| + SPLIT_WANT_NONEMPTY, |
| +}; |
| + |
| +// Split the given string on ANY of the given separators, returning copies of |
| +// the result. |
| // |
| -// Every substring is trimmed of any leading or trailing white space. |
| -// NOTE: |c| must be in BMP (Basic Multilingual Plane) |
| -BASE_EXPORT void SplitString(const string16& str, |
| - char16 c, |
| - std::vector<string16>* r); |
| +// To split on either commas or semicolons, keeping all whitespace: |
| +// |
| +// std::vector<std::string> tokens = base::SplitString( |
| +// input, ",;", base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL); |
| +BASE_EXPORT std::vector<std::string> SplitString( |
| + StringPiece input, |
| + StringPiece separators, |
| + WhitespaceHandling whitespace, |
| + SplitResult result_type); |
| +BASE_EXPORT std::vector<string16> SplitString( |
| + StringPiece16 input, |
| + StringPiece16 separators, |
| + WhitespaceHandling whitespace, |
| + SplitResult result_type); |
| -// |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which |
| -// the trailing byte of a multi-byte character can be in the ASCII range. |
| -// UTF-8, and other single/multi-byte ASCII-compatible encodings are OK. |
| -// Note: |c| must be in the ASCII range. |
| -BASE_EXPORT void SplitString(const std::string& str, |
| - char c, |
| - std::vector<std::string>* r); |
| +// Like SplitString above except it returns a vector of StringPieces which |
| +// reference the original buffer without copying. Although you have to be |
| +// careful to keep the original string unmodified, this provides an efficient |
| +// way to iterate through tokens in a string. |
| +// |
| +// To iterate through all whitespace-separated tokens in an input string: |
| +// |
| +// for (const auto& cur : |
| +// base::SplitStringPiece(input, base::kWhitespaceASCII, |
| +// base::KEEP_WHITESPACE, |
|
danakj
2015/06/12 17:58:12
Just to sanity check, KEEP or TRIM whitespace here
brettw
2015/06/12 18:09:45
Correct. I wrote this since then the code won't ev
|
| +// base::SPLIT_WANT_NONEMPTY)) { |
| +// ... |
| +BASE_EXPORT std::vector<StringPiece> SplitStringPiece( |
| + StringPiece input, |
| + StringPiece separators, |
| + WhitespaceHandling whitespace, |
| + SplitResult result_type); |
| +BASE_EXPORT std::vector<StringPiece16> SplitStringPiece( |
| + StringPiece16 input, |
| + StringPiece16 separators, |
| + WhitespaceHandling whitespace, |
| + SplitResult result_type); |
| -typedef std::vector<std::pair<std::string, std::string> > StringPairs; |
| +typedef std::vector<std::pair<std::string, std::string>> StringPairs; |
|
danakj
2015/06/12 17:58:12
nit: while you're changing this you could make it
brettw
2015/06/12 18:09:45
Sure.
|
| // Splits |line| into key value pairs according to the given delimiters and |
| // removes whitespace leading each key and trailing each value. Returns true |
| @@ -43,7 +88,12 @@ BASE_EXPORT bool SplitStringIntoKeyValuePairs(const std::string& line, |
| char key_value_pair_delimiter, |
| StringPairs* key_value_pairs); |
| -// The same as SplitString, but use a substring delimiter instead of a char. |
| +// Similar to SplitString, but use a substring delimiter instead of a list of |
| +// characters that are all possible delimiters. |
| +// |
| +// TODO(brettw) this should probably be changed and expanded to provide a |
| +// mirror of the SplitString[Piece] API above, just with the different |
| +// delimiter handling. |
| BASE_EXPORT void SplitStringUsingSubstr(const string16& str, |
| const string16& s, |
| std::vector<string16>* r); |
| @@ -51,27 +101,53 @@ BASE_EXPORT void SplitStringUsingSubstr(const std::string& str, |
| const std::string& s, |
| std::vector<std::string>* r); |
| +// ----------------------------------------------------------------------------- |
| +// Backwards-compat wrappers |
| +// |
| +// New code should use one of the more general variants above. |
| +// TODO(brettw) remove these and convert to the versions above. |
| + |
| +// Splits |str| into a vector of strings delimited by |c|, placing the results |
| +// in |r|. If several instances of |c| are contiguous, or if |str| begins with |
| +// or ends with |c|, then an empty string is inserted. |
| +// |
| +// Every substring is trimmed of any leading or trailing white space. |
| +// NOTE: |c| must be in BMP (Basic Multilingual Plane) |
| +BASE_EXPORT void SplitString(const string16& str, |
| + char16 c, |
| + std::vector<string16>* r); |
| + |
| +// |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which |
| +// the trailing byte of a multi-byte character can be in the ASCII range. |
| +// UTF-8, and other single/multi-byte ASCII-compatible encodings are OK. |
| +// Note: |c| must be in the ASCII range. |
| +BASE_EXPORT void SplitString(const std::string& str, |
| + char c, |
| + std::vector<std::string>* r); |
| + |
| // The same as SplitString, but don't trim white space. |
| // NOTE: |c| must be in BMP (Basic Multilingual Plane) |
| -BASE_EXPORT void SplitStringDontTrim(const string16& str, |
| +BASE_EXPORT void SplitStringDontTrim(StringPiece16 str, |
| char16 c, |
| std::vector<string16>* r); |
| // |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which |
| // the trailing byte of a multi-byte character can be in the ASCII range. |
| // UTF-8, and other single/multi-byte ASCII-compatible encodings are OK. |
| // Note: |c| must be in the ASCII range. |
| -BASE_EXPORT void SplitStringDontTrim(const std::string& str, |
| +BASE_EXPORT void SplitStringDontTrim(StringPiece str, |
| char c, |
| - std::vector<std::string>* r); |
| + std::vector<std::string>* result); |
| -// WARNING: this uses whitespace as defined by the HTML5 spec. If you need |
| -// a function similar to this but want to trim all types of whitespace, then |
| -// factor this out into a function that takes a string containing the characters |
| -// that are treated as whitespace. |
| +// WARNING: this uses whitespace as defined by the HTML5 spec (ASCII whitespace |
| +// only). |
| +// |
| +// The difference between this and calling SplitString with the whitespace |
| +// characters as separators is the treatment of the first element when the |
| +// string starts with whitespace. |
| // |
| -// Splits the string along whitespace (where whitespace is the five space |
| -// characters defined by HTML 5). Each contiguous block of non-whitespace |
| -// characters is added to result. |
| +// Input SplitString SplitStringAlongWhitespace |
| +// -------------------------------------------------------- |
| +// " a " "", "a" "a" |
| BASE_EXPORT void SplitStringAlongWhitespace(const string16& str, |
| std::vector<string16>* result); |
| BASE_EXPORT void SplitStringAlongWhitespace(const std::string& str, |