Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(189)

Unified Diff: base/strings/string_split.h

Issue 1169393003: Add new SplitString backend. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: base/strings/string_split.h
diff --git a/base/strings/string_split.h b/base/strings/string_split.h
index 55d8cb377edfa597c19173b0e1832b3ce478368f..72e09c4c27ecc264a14cba4b0c264d6559961524 100644
--- a/base/strings/string_split.h
+++ b/base/strings/string_split.h
@@ -11,26 +11,59 @@
#include "base/base_export.h"
#include "base/strings/string16.h"
+#include "base/strings/string_piece.h"
namespace base {
-// Splits |str| into a vector of strings delimited by |c|, placing the results
-// in |r|. If several instances of |c| are contiguous, or if |str| begins with
-// or ends with |c|, then an empty string is inserted.
+enum WhitespaceHandling {
+ KEEP_WHITESPACE,
+ TRIM_WHITESPACE,
+};
+
+enum SplitResult {
+ // Strictly return all results.
+ //
+ // If the input is ",," and the separator is ',', this will return a
+ // vector of three empty strings.
+ SPLIT_WANT_ALL,
+
+ // Only nonempty results will be added to the results. Multiple separators
+ // will be coalesced. Separators at the beginning and end of the input will
+ // be ignored. With TRIM_WHITESPACE, whitespace-only results will be dropped.
+ //
+ // If the input is ",," and the separator is ',', this will return an empty
+ // vector.
+ SPLIT_WANT_NONEMPTY,
+};
+
+// Split the given string on ANY of the given separators, returning copies of
+// the result.
//
-// Every substring is trimmed of any leading or trailing white space.
-// NOTE: |c| must be in BMP (Basic Multilingual Plane)
-BASE_EXPORT void SplitString(const string16& str,
- char16 c,
- std::vector<string16>* r);
+// If the string ends in a separator, it will be ignored rather than treating
+// that as a last empty element.
+BASE_EXPORT std::vector<std::string> SplitString(
+ StringPiece input,
+ StringPiece separators,
+ WhitespaceHandling whitespace,
+ SplitResult result_type);
+BASE_EXPORT std::vector<string16> SplitString(
+ StringPiece16 input,
+ StringPiece16 separators,
+ WhitespaceHandling whitespace,
+ SplitResult result_type);
-// |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which
-// the trailing byte of a multi-byte character can be in the ASCII range.
-// UTF-8, and other single/multi-byte ASCII-compatible encodings are OK.
-// Note: |c| must be in the ASCII range.
-BASE_EXPORT void SplitString(const std::string& str,
- char c,
- std::vector<std::string>* r);
+// As SplitString above except returns a vector of StringPieces which referece
danakj 2015/06/11 23:58:34 reference
+// the original buffer without copying.
+BASE_EXPORT std::vector<StringPiece> SplitStringPiece(
+ StringPiece input,
+ StringPiece separators,
+ WhitespaceHandling whitespace,
+ SplitResult result_type);
+BASE_EXPORT std::vector<StringPiece16> SplitStringPiece(
+ StringPiece16 input,
+ StringPiece16 separators,
+ WhitespaceHandling whitespace,
+ SplitResult result_type);
typedef std::vector<std::pair<std::string, std::string> > StringPairs;
@@ -51,27 +84,52 @@ BASE_EXPORT void SplitStringUsingSubstr(const std::string& str,
const std::string& s,
std::vector<std::string>* r);
+// -----------------------------------------------------------------------------
+// Backwards-compat wrappers
danakj 2015/06/11 23:58:35 Wondering, are you planning to remove these?
brettw 2015/06/12 17:37:08 Added a TODO
+//
+// New code should use one of the more general variants above.
+
+// Splits |str| into a vector of strings delimited by |c|, placing the results
+// in |r|. If several instances of |c| are contiguous, or if |str| begins with
+// or ends with |c|, then an empty string is inserted.
+//
+// Every substring is trimmed of any leading or trailing white space.
+// NOTE: |c| must be in BMP (Basic Multilingual Plane)
+BASE_EXPORT void SplitString(const string16& str,
+ char16 c,
+ std::vector<string16>* r);
+
+// |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which
+// the trailing byte of a multi-byte character can be in the ASCII range.
+// UTF-8, and other single/multi-byte ASCII-compatible encodings are OK.
+// Note: |c| must be in the ASCII range.
+BASE_EXPORT void SplitString(const std::string& str,
+ char c,
+ std::vector<std::string>* r);
+
// The same as SplitString, but don't trim white space.
// NOTE: |c| must be in BMP (Basic Multilingual Plane)
-BASE_EXPORT void SplitStringDontTrim(const string16& str,
+BASE_EXPORT void SplitStringDontTrim(StringPiece16 str,
char16 c,
std::vector<string16>* r);
// |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which
// the trailing byte of a multi-byte character can be in the ASCII range.
// UTF-8, and other single/multi-byte ASCII-compatible encodings are OK.
// Note: |c| must be in the ASCII range.
-BASE_EXPORT void SplitStringDontTrim(const std::string& str,
+BASE_EXPORT void SplitStringDontTrim(StringPiece str,
char c,
- std::vector<std::string>* r);
+ std::vector<std::string>* result);
-// WARNING: this uses whitespace as defined by the HTML5 spec. If you need
-// a function similar to this but want to trim all types of whitespace, then
-// factor this out into a function that takes a string containing the characters
-// that are treated as whitespace.
+// WARNING: this uses whitespace as defined by the HTML5 spec (ASCII whitespace
+// only).
+//
+// The difference between this and calling SplitString with the whitespace
+// characters as separators is the treatment of the first element when the
+// string starts with whitespace.
//
-// Splits the string along whitespace (where whitespace is the five space
-// characters defined by HTML 5). Each contiguous block of non-whitespace
-// characters is added to result.
+// Input SplitString SplitStringAlongWhitespace
+// --------------------------------------------------------
+// " a " "", "a" "a"
BASE_EXPORT void SplitStringAlongWhitespace(const string16& str,
std::vector<string16>* result);
BASE_EXPORT void SplitStringAlongWhitespace(const std::string& str,

Powered by Google App Engine
This is Rietveld 408576698