Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2362)

Unified Diff: base/strings/string_split.h

Issue 1169393003: Add new SplitString backend. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « base/strings/string_piece.h ('k') | base/strings/string_split.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: base/strings/string_split.h
diff --git a/base/strings/string_split.h b/base/strings/string_split.h
index 55d8cb377edfa597c19173b0e1832b3ce478368f..dc4108d327ff2a4653bd58090bfcdc19995150b4 100644
--- a/base/strings/string_split.h
+++ b/base/strings/string_split.h
@@ -11,28 +11,73 @@
#include "base/base_export.h"
#include "base/strings/string16.h"
+#include "base/strings/string_piece.h"
namespace base {
-// Splits |str| into a vector of strings delimited by |c|, placing the results
-// in |r|. If several instances of |c| are contiguous, or if |str| begins with
-// or ends with |c|, then an empty string is inserted.
+enum WhitespaceHandling {
+ KEEP_WHITESPACE,
+ TRIM_WHITESPACE,
+};
+
+enum SplitResult {
+ // Strictly return all results.
+ //
+ // If the input is ",," and the separator is ',' this will return a
+ // vector of three empty strings.
+ SPLIT_WANT_ALL,
+
+ // Only nonempty results will be added to the results. Multiple separators
+ // will be coalesced. Separators at the beginning and end of the input will
+ // be ignored. With TRIM_WHITESPACE, whitespace-only results will be dropped.
+ //
+ // If the input is ",," and the separator is ',', this will return an empty
+ // vector.
+ SPLIT_WANT_NONEMPTY,
+};
+
+// Split the given string on ANY of the given separators, returning copies of
+// the result.
//
-// Every substring is trimmed of any leading or trailing white space.
-// NOTE: |c| must be in BMP (Basic Multilingual Plane)
-BASE_EXPORT void SplitString(const string16& str,
- char16 c,
- std::vector<string16>* r);
+// To split on either commas or semicolons, keeping all whitespace:
+//
+// std::vector<std::string> tokens = base::SplitString(
+// input, ",;", base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL);
+BASE_EXPORT std::vector<std::string> SplitString(
+ StringPiece input,
+ StringPiece separators,
+ WhitespaceHandling whitespace,
+ SplitResult result_type);
+BASE_EXPORT std::vector<string16> SplitString(
+ StringPiece16 input,
+ StringPiece16 separators,
+ WhitespaceHandling whitespace,
+ SplitResult result_type);
-// |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which
-// the trailing byte of a multi-byte character can be in the ASCII range.
-// UTF-8, and other single/multi-byte ASCII-compatible encodings are OK.
-// Note: |c| must be in the ASCII range.
-BASE_EXPORT void SplitString(const std::string& str,
- char c,
- std::vector<std::string>* r);
+// Like SplitString above except it returns a vector of StringPieces which
+// reference the original buffer without copying. Although you have to be
+// careful to keep the original string unmodified, this provides an efficient
+// way to iterate through tokens in a string.
+//
+// To iterate through all whitespace-separated tokens in an input string:
+//
+// for (const auto& cur :
+// base::SplitStringPiece(input, base::kWhitespaceASCII,
+// base::KEEP_WHITESPACE,
+// base::SPLIT_WANT_NONEMPTY)) {
+// ...
+BASE_EXPORT std::vector<StringPiece> SplitStringPiece(
+ StringPiece input,
+ StringPiece separators,
+ WhitespaceHandling whitespace,
+ SplitResult result_type);
+BASE_EXPORT std::vector<StringPiece16> SplitStringPiece(
+ StringPiece16 input,
+ StringPiece16 separators,
+ WhitespaceHandling whitespace,
+ SplitResult result_type);
-typedef std::vector<std::pair<std::string, std::string> > StringPairs;
+using StringPairs = std::vector<std::pair<std::string, std::string>>;
// Splits |line| into key value pairs according to the given delimiters and
// removes whitespace leading each key and trailing each value. Returns true
@@ -43,7 +88,12 @@ BASE_EXPORT bool SplitStringIntoKeyValuePairs(const std::string& line,
char key_value_pair_delimiter,
StringPairs* key_value_pairs);
-// The same as SplitString, but use a substring delimiter instead of a char.
+// Similar to SplitString, but use a substring delimiter instead of a list of
+// characters that are all possible delimiters.
+//
+// TODO(brettw) this should probably be changed and expanded to provide a
+// mirror of the SplitString[Piece] API above, just with the different
+// delimiter handling.
BASE_EXPORT void SplitStringUsingSubstr(const string16& str,
const string16& s,
std::vector<string16>* r);
@@ -51,27 +101,53 @@ BASE_EXPORT void SplitStringUsingSubstr(const std::string& str,
const std::string& s,
std::vector<std::string>* r);
+// -----------------------------------------------------------------------------
+// Backwards-compat wrappers
+//
+// New code should use one of the more general variants above.
+// TODO(brettw) remove these and convert to the versions above.
+
+// Splits |str| into a vector of strings delimited by |c|, placing the results
+// in |r|. If several instances of |c| are contiguous, or if |str| begins with
+// or ends with |c|, then an empty string is inserted.
+//
+// Every substring is trimmed of any leading or trailing white space.
+// NOTE: |c| must be in BMP (Basic Multilingual Plane)
+BASE_EXPORT void SplitString(const string16& str,
+ char16 c,
+ std::vector<string16>* r);
+
+// |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which
+// the trailing byte of a multi-byte character can be in the ASCII range.
+// UTF-8, and other single/multi-byte ASCII-compatible encodings are OK.
+// Note: |c| must be in the ASCII range.
+BASE_EXPORT void SplitString(const std::string& str,
+ char c,
+ std::vector<std::string>* r);
+
// The same as SplitString, but don't trim white space.
// NOTE: |c| must be in BMP (Basic Multilingual Plane)
-BASE_EXPORT void SplitStringDontTrim(const string16& str,
+BASE_EXPORT void SplitStringDontTrim(StringPiece16 str,
char16 c,
std::vector<string16>* r);
// |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which
// the trailing byte of a multi-byte character can be in the ASCII range.
// UTF-8, and other single/multi-byte ASCII-compatible encodings are OK.
// Note: |c| must be in the ASCII range.
-BASE_EXPORT void SplitStringDontTrim(const std::string& str,
+BASE_EXPORT void SplitStringDontTrim(StringPiece str,
char c,
- std::vector<std::string>* r);
+ std::vector<std::string>* result);
-// WARNING: this uses whitespace as defined by the HTML5 spec. If you need
-// a function similar to this but want to trim all types of whitespace, then
-// factor this out into a function that takes a string containing the characters
-// that are treated as whitespace.
+// WARNING: this uses whitespace as defined by the HTML5 spec (ASCII whitespace
+// only).
+//
+// The difference between this and calling SplitString with the whitespace
+// characters as separators is the treatment of the first element when the
+// string starts with whitespace.
//
-// Splits the string along whitespace (where whitespace is the five space
-// characters defined by HTML 5). Each contiguous block of non-whitespace
-// characters is added to result.
+// Input SplitString SplitStringAlongWhitespace
+// --------------------------------------------------------
+// " a " "", "a" "a"
BASE_EXPORT void SplitStringAlongWhitespace(const string16& str,
std::vector<string16>* result);
BASE_EXPORT void SplitStringAlongWhitespace(const std::string& str,
« no previous file with comments | « base/strings/string_piece.h ('k') | base/strings/string_split.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698