Chromium Code Reviews| Index: base/strings/string_split.h |
| diff --git a/base/strings/string_split.h b/base/strings/string_split.h |
| index 55d8cb377edfa597c19173b0e1832b3ce478368f..72e09c4c27ecc264a14cba4b0c264d6559961524 100644 |
| --- a/base/strings/string_split.h |
| +++ b/base/strings/string_split.h |
| @@ -11,26 +11,59 @@ |
| #include "base/base_export.h" |
| #include "base/strings/string16.h" |
| +#include "base/strings/string_piece.h" |
| namespace base { |
| -// Splits |str| into a vector of strings delimited by |c|, placing the results |
| -// in |r|. If several instances of |c| are contiguous, or if |str| begins with |
| -// or ends with |c|, then an empty string is inserted. |
| +enum WhitespaceHandling { |
| + KEEP_WHITESPACE, |
| + TRIM_WHITESPACE, |
| +}; |
| + |
| +enum SplitResult { |
| + // Strictly return all results. |
| + // |
| + // If the input is ",," and the separator is ',', this will return a |
| + // vector of three empty strings. |
| + SPLIT_WANT_ALL, |
| + |
| + // Only nonempty results will be added to the results. Multiple separators |
| + // will be coalesced. Separators at the beginning and end of the input will |
| + // be ignored. With TRIM_WHITESPACE, whitespace-only results will be dropped. |
| + // |
| + // If the input is ",," and the separator is ',', this will return an empty |
| + // vector. |
| + SPLIT_WANT_NONEMPTY, |
| +}; |
| + |
| +// Split the given string on ANY of the given separators, returning copies of |
| +// the result. |
| // |
| -// Every substring is trimmed of any leading or trailing white space. |
| -// NOTE: |c| must be in BMP (Basic Multilingual Plane) |
| -BASE_EXPORT void SplitString(const string16& str, |
| - char16 c, |
| - std::vector<string16>* r); |
| +// If the string ends in a separator, it will be ignored rather than treating |
| +// that as a last empty element. |
| +BASE_EXPORT std::vector<std::string> SplitString( |
| + StringPiece input, |
| + StringPiece separators, |
| + WhitespaceHandling whitespace, |
| + SplitResult result_type); |
| +BASE_EXPORT std::vector<string16> SplitString( |
| + StringPiece16 input, |
| + StringPiece16 separators, |
| + WhitespaceHandling whitespace, |
| + SplitResult result_type); |
| -// |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which |
| -// the trailing byte of a multi-byte character can be in the ASCII range. |
| -// UTF-8, and other single/multi-byte ASCII-compatible encodings are OK. |
| -// Note: |c| must be in the ASCII range. |
| -BASE_EXPORT void SplitString(const std::string& str, |
| - char c, |
| - std::vector<std::string>* r); |
| +// As SplitString above except returns a vector of StringPieces which referece |
|
danakj
2015/06/11 23:58:34
reference
|
| +// the original buffer without copying. |
| +BASE_EXPORT std::vector<StringPiece> SplitStringPiece( |
| + StringPiece input, |
| + StringPiece separators, |
| + WhitespaceHandling whitespace, |
| + SplitResult result_type); |
| +BASE_EXPORT std::vector<StringPiece16> SplitStringPiece( |
| + StringPiece16 input, |
| + StringPiece16 separators, |
| + WhitespaceHandling whitespace, |
| + SplitResult result_type); |
| typedef std::vector<std::pair<std::string, std::string> > StringPairs; |
| @@ -51,27 +84,52 @@ BASE_EXPORT void SplitStringUsingSubstr(const std::string& str, |
| const std::string& s, |
| std::vector<std::string>* r); |
| +// ----------------------------------------------------------------------------- |
| +// Backwards-compat wrappers |
|
danakj
2015/06/11 23:58:35
Wondering, are you planning to remove these?
brettw
2015/06/12 17:37:08
Added a TODO
|
| +// |
| +// New code should use one of the more general variants above. |
| + |
| +// Splits |str| into a vector of strings delimited by |c|, placing the results |
| +// in |r|. If several instances of |c| are contiguous, or if |str| begins with |
| +// or ends with |c|, then an empty string is inserted. |
| +// |
| +// Every substring is trimmed of any leading or trailing white space. |
| +// NOTE: |c| must be in BMP (Basic Multilingual Plane) |
| +BASE_EXPORT void SplitString(const string16& str, |
| + char16 c, |
| + std::vector<string16>* r); |
| + |
| +// |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which |
| +// the trailing byte of a multi-byte character can be in the ASCII range. |
| +// UTF-8, and other single/multi-byte ASCII-compatible encodings are OK. |
| +// Note: |c| must be in the ASCII range. |
| +BASE_EXPORT void SplitString(const std::string& str, |
| + char c, |
| + std::vector<std::string>* r); |
| + |
| // The same as SplitString, but don't trim white space. |
| // NOTE: |c| must be in BMP (Basic Multilingual Plane) |
| -BASE_EXPORT void SplitStringDontTrim(const string16& str, |
| +BASE_EXPORT void SplitStringDontTrim(StringPiece16 str, |
| char16 c, |
| std::vector<string16>* r); |
| // |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which |
| // the trailing byte of a multi-byte character can be in the ASCII range. |
| // UTF-8, and other single/multi-byte ASCII-compatible encodings are OK. |
| // Note: |c| must be in the ASCII range. |
| -BASE_EXPORT void SplitStringDontTrim(const std::string& str, |
| +BASE_EXPORT void SplitStringDontTrim(StringPiece str, |
| char c, |
| - std::vector<std::string>* r); |
| + std::vector<std::string>* result); |
| -// WARNING: this uses whitespace as defined by the HTML5 spec. If you need |
| -// a function similar to this but want to trim all types of whitespace, then |
| -// factor this out into a function that takes a string containing the characters |
| -// that are treated as whitespace. |
| +// WARNING: this uses whitespace as defined by the HTML5 spec (ASCII whitespace |
| +// only). |
| +// |
| +// The difference between this and calling SplitString with the whitespace |
| +// characters as separators is the treatment of the first element when the |
| +// string starts with whitespace. |
| // |
| -// Splits the string along whitespace (where whitespace is the five space |
| -// characters defined by HTML 5). Each contiguous block of non-whitespace |
| -// characters is added to result. |
| +// Input SplitString SplitStringAlongWhitespace |
| +// -------------------------------------------------------- |
| +// " a " "", "a" "a" |
| BASE_EXPORT void SplitStringAlongWhitespace(const string16& str, |
| std::vector<string16>* result); |
| BASE_EXPORT void SplitStringAlongWhitespace(const std::string& str, |