Index: base/strings/string_split.h |
diff --git a/base/strings/string_split.h b/base/strings/string_split.h |
index 55d8cb377edfa597c19173b0e1832b3ce478368f..1f2057172de90140958b32134cb462f41e46c149 100644 |
--- a/base/strings/string_split.h |
+++ b/base/strings/string_split.h |
@@ -11,28 +11,71 @@ |
#include "base/base_export.h" |
#include "base/strings/string16.h" |
+#include "base/strings/string_piece.h" |
namespace base { |
-// Splits |str| into a vector of strings delimited by |c|, placing the results |
-// in |r|. If several instances of |c| are contiguous, or if |str| begins with |
-// or ends with |c|, then an empty string is inserted. |
+enum WhitespaceHandling { |
+ KEEP_WHITESPACE, |
+ TRIM_WHITESPACE, |
+}; |
+ |
+enum SplitResult { |
+ // Strictly return all results. |
+ // |
+ // If the input is ",," and the separator is ',' this will return a |
+ // vector of three empty strings. |
+ SPLIT_WANT_ALL, |
+ |
+ // Only nonempty results will be added to the results. Multiple separators |
+ // will be coalesced. Separators at the beginning and end of the input will |
+ // be ignored. With TRIM_WHITESPACE, whitespace-only results will be dropped. |
+ // |
+ // If the input is ",," and the separator is ',', this will return an empty |
+ // vector. |
+ SPLIT_WANT_NONEMPTY, |
+}; |
+ |
+// Split the given string on ANY of the given separators, returning copies of |
+// the result. |
// |
-// Every substring is trimmed of any leading or trailing white space. |
-// NOTE: |c| must be in BMP (Basic Multilingual Plane) |
-BASE_EXPORT void SplitString(const string16& str, |
- char16 c, |
- std::vector<string16>* r); |
+// To split on either commas or semicolons, keeping all whitespace: |
+// |
+// std::vector<std::string> tokens = base::SplitString( |
+// input, ",;", base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL); |
+BASE_EXPORT std::vector<std::string> SplitString(StringPiece input, |
+ StringPiece separators, |
+ WhitespaceHandling whitespace, |
+ SplitResult result_type); |
+BASE_EXPORT std::vector<string16> SplitString(StringPiece16 input, |
+ StringPiece16 separators, |
+ WhitespaceHandling whitespace, |
+ SplitResult result_type); |
-// |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which |
-// the trailing byte of a multi-byte character can be in the ASCII range. |
-// UTF-8, and other single/multi-byte ASCII-compatible encodings are OK. |
-// Note: |c| must be in the ASCII range. |
-BASE_EXPORT void SplitString(const std::string& str, |
- char c, |
- std::vector<std::string>* r); |
+// Like SplitString above except it returns a vector of StringPieces which |
+// reference the original buffer without copying. Although you have to be |
+// careful to keep the original string unmodified, this provides an efficient |
+// way to iterate through tokens in a string. |
+// |
+// To iterate through all whitespace-separated tokens in an input string: |
+// |
+// for (const auto& cur : |
+// base::SplitStringPiece(input, base::kWhitespaceASCII, |
+// base::KEEP_WHITESPACE, |
+// base::SPLIT_WANT_NONEMPTY)) { |
+// ... |
+BASE_EXPORT std::vector<StringPiece> SplitStringPiece( |
+ StringPiece input, |
+ StringPiece separators, |
+ WhitespaceHandling whitespace, |
+ SplitResult result_type); |
+BASE_EXPORT std::vector<StringPiece16> SplitStringPiece( |
+ StringPiece16 input, |
+ StringPiece16 separators, |
+ WhitespaceHandling whitespace, |
+ SplitResult result_type); |
-typedef std::vector<std::pair<std::string, std::string> > StringPairs; |
+using StringPairs = std::vector<std::pair<std::string, std::string>>; |
// Splits |line| into key value pairs according to the given delimiters and |
// removes whitespace leading each key and trailing each value. Returns true |
@@ -43,7 +86,12 @@ BASE_EXPORT bool SplitStringIntoKeyValuePairs(const std::string& line, |
char key_value_pair_delimiter, |
StringPairs* key_value_pairs); |
-// The same as SplitString, but use a substring delimiter instead of a char. |
+// Similar to SplitString, but use a substring delimiter instead of a list of |
+// characters that are all possible delimiters. |
+// |
+// TODO(brettw) this should probably be changed and expanded to provide a |
+// mirror of the SplitString[Piece] API above, just with the different |
+// delimiter handling. |
BASE_EXPORT void SplitStringUsingSubstr(const string16& str, |
const string16& s, |
std::vector<string16>* r); |
@@ -51,27 +99,53 @@ BASE_EXPORT void SplitStringUsingSubstr(const std::string& str, |
const std::string& s, |
std::vector<std::string>* r); |
+// ----------------------------------------------------------------------------- |
+// Backwards-compat wrappers |
+// |
+// New code should use one of the more general variants above. |
+// TODO(brettw) remove these and convert to the versions above. |
+ |
+// Splits |str| into a vector of strings delimited by |c|, placing the results |
+// in |r|. If several instances of |c| are contiguous, or if |str| begins with |
+// or ends with |c|, then an empty string is inserted. |
+// |
+// Every substring is trimmed of any leading or trailing white space. |
+// NOTE: |c| must be in BMP (Basic Multilingual Plane) |
+BASE_EXPORT void SplitString(const string16& str, |
+ char16 c, |
+ std::vector<string16>* r); |
+ |
+// |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which |
+// the trailing byte of a multi-byte character can be in the ASCII range. |
+// UTF-8, and other single/multi-byte ASCII-compatible encodings are OK. |
+// Note: |c| must be in the ASCII range. |
+BASE_EXPORT void SplitString(const std::string& str, |
+ char c, |
+ std::vector<std::string>* r); |
+ |
// The same as SplitString, but don't trim white space. |
// NOTE: |c| must be in BMP (Basic Multilingual Plane) |
-BASE_EXPORT void SplitStringDontTrim(const string16& str, |
+BASE_EXPORT void SplitStringDontTrim(StringPiece16 str, |
char16 c, |
std::vector<string16>* r); |
// |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which |
// the trailing byte of a multi-byte character can be in the ASCII range. |
// UTF-8, and other single/multi-byte ASCII-compatible encodings are OK. |
// Note: |c| must be in the ASCII range. |
-BASE_EXPORT void SplitStringDontTrim(const std::string& str, |
+BASE_EXPORT void SplitStringDontTrim(StringPiece str, |
char c, |
- std::vector<std::string>* r); |
+ std::vector<std::string>* result); |
-// WARNING: this uses whitespace as defined by the HTML5 spec. If you need |
-// a function similar to this but want to trim all types of whitespace, then |
-// factor this out into a function that takes a string containing the characters |
-// that are treated as whitespace. |
+// WARNING: this uses whitespace as defined by the HTML5 spec (ASCII whitespace |
+// only). |
+// |
+// The difference between this and calling SplitString with the whitespace |
+// characters as separators is the treatment of the first element when the |
+// string starts with whitespace. |
// |
-// Splits the string along whitespace (where whitespace is the five space |
-// characters defined by HTML 5). Each contiguous block of non-whitespace |
-// characters is added to result. |
+// Input SplitString SplitStringAlongWhitespace |
+// -------------------------------------------------------- |
+// " a " "", "a" "a" |
BASE_EXPORT void SplitStringAlongWhitespace(const string16& str, |
std::vector<string16>* result); |
BASE_EXPORT void SplitStringAlongWhitespace(const std::string& str, |