Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(512)

Unified Diff: base/strings/string_split.cc

Issue 1169393003: Add new SplitString backend. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « base/strings/string_split.h ('k') | base/strings/string_split_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: base/strings/string_split.cc
diff --git a/base/strings/string_split.cc b/base/strings/string_split.cc
index 88a623664fcc1c256de60c110c3186ecc0034885..e23ce3fa039a087b3cd902aab55a18520599d9ae 100644
--- a/base/strings/string_split.cc
+++ b/base/strings/string_split.cc
@@ -12,26 +12,91 @@ namespace base {
namespace {
-template <typename STR>
-void SplitStringT(const STR& str,
- const typename STR::value_type s,
- bool trim_whitespace,
- std::vector<STR>* r) {
- r->clear();
- size_t last = 0;
- size_t c = str.size();
- for (size_t i = 0; i <= c; ++i) {
- if (i == c || str[i] == s) {
- STR tmp(str, last, i - last);
- if (trim_whitespace)
- TrimWhitespace(tmp, TRIM_ALL, &tmp);
- // Avoid converting an empty or all-whitespace source string into a vector
- // of one empty string.
- if (i != c || !r->empty() || !tmp.empty())
- r->push_back(tmp);
- last = i + 1;
+// PieceToOutputType converts a StringPiece as needed to a given output type,
+// which is either the same type of StringPiece (a NOP) or the corresponding
+// non-piece string type.
+//
+// The default converter is a NOP, it works when the OutputType is the
+// correct StringPiece.
+template<typename Str, typename OutputType>
+OutputType PieceToOutputType(BasicStringPiece<Str> piece) {
+ return piece;
+}
+template<> // Convert StringPiece to std::string
+std::string PieceToOutputType<std::string, std::string>(StringPiece piece) {
+ return piece.as_string();
+}
+template<> // Convert StringPiece16 to string16.
+string16 PieceToOutputType<string16, string16>(StringPiece16 piece) {
+ return piece.as_string();
+}
+
+// Returns either the ASCII or UTF-16 whitespace.
+template<typename Str> BasicStringPiece<Str> WhitespaceForType();
+template<> StringPiece16 WhitespaceForType<string16>() {
+ return kWhitespaceUTF16;
+}
+template<> StringPiece WhitespaceForType<std::string>() {
+ return kWhitespaceASCII;
+}
+
+// Optimize the single-character case to call find() on the string instead,
+// since this is the common case and can be made faster. This could have been
+// done with template specialization too, but would have been less clear.
+//
+// There is no corresponding FindFirstNotOf because StringPiece already
+// implements these different versions that do the optimized searching.
+size_t FindFirstOf(StringPiece piece, char c, size_t pos) {
+ return piece.find(c, pos);
+}
+size_t FindFirstOf(StringPiece16 piece, char16 c, size_t pos) {
+ return piece.find(c, pos);
+}
+size_t FindFirstOf(StringPiece piece, StringPiece one_of, size_t pos) {
+ return piece.find_first_of(one_of, pos);
+}
+size_t FindFirstOf(StringPiece16 piece, StringPiece16 one_of, size_t pos) {
+ return piece.find_first_of(one_of, pos);
+}
+
+// General string splitter template. Can take 8- or 16-bit input, can produce
+// the corresponding string or StringPiece output, and can take single- or
+// multiple-character delimiters.
+//
+// DelimiterType is either a character (Str::value_type) or a string piece of
+// multiple characters (BasicStringPiece<Str>). StringPiece has a version of
+// find for both of these cases, and the single-character version is the most
+// common and can be implemented faster, which is why this is a template.
+template<typename Str, typename OutputStringType, typename DelimiterType>
+static std::vector<OutputStringType> SplitStringT(
+ BasicStringPiece<Str> str,
+ DelimiterType delimiter,
+ WhitespaceHandling whitespace,
+ SplitResult result_type) {
+ std::vector<OutputStringType> result;
+ if (str.empty())
+ return result;
+
+ size_t start = 0;
+ while (start != Str::npos) {
+ size_t end = FindFirstOf(str, delimiter, start);
+
+ BasicStringPiece<Str> piece;
+ if (end == Str::npos) {
+ piece = str.substr(start);
+ start = Str::npos;
+ } else {
+ piece = str.substr(start, end - start);
+ start = end + 1;
}
+
+ if (whitespace == TRIM_WHITESPACE)
+ piece = TrimString(piece, WhitespaceForType<Str>(), TRIM_ALL);
+
+ if (result_type == SPLIT_WANT_ALL || !piece.empty())
+ result.push_back(PieceToOutputType<Str, OutputStringType>(piece));
}
+ return result;
}
bool SplitStringIntoKeyValue(const std::string& line,
@@ -62,8 +127,8 @@ bool SplitStringIntoKeyValue(const std::string& line,
template <typename STR>
void SplitStringUsingSubstrT(const STR& str,
- const STR& s,
- std::vector<STR>* r) {
+ const STR& s,
+ std::vector<STR>* r) {
r->clear();
typename STR::size_type begin_index = 0;
while (true) {
@@ -83,64 +148,89 @@ void SplitStringUsingSubstrT(const STR& str,
}
}
-template<typename STR>
-void SplitStringAlongWhitespaceT(const STR& str, std::vector<STR>* result) {
- result->clear();
- const size_t length = str.length();
- if (!length)
- return;
-
- bool last_was_ws = false;
- size_t last_non_ws_start = 0;
- for (size_t i = 0; i < length; ++i) {
- switch (str[i]) {
- // HTML 5 defines whitespace as: space, tab, LF, line tab, FF, or CR.
- case L' ':
- case L'\t':
- case L'\xA':
- case L'\xB':
- case L'\xC':
- case L'\xD':
- if (!last_was_ws) {
- if (i > 0) {
- result->push_back(
- str.substr(last_non_ws_start, i - last_non_ws_start));
- }
- last_was_ws = true;
- }
- break;
-
- default: // Not a space character.
- if (last_was_ws) {
- last_was_ws = false;
- last_non_ws_start = i;
- }
- break;
- }
+} // namespace
+
+std::vector<std::string> SplitString(StringPiece input,
+ StringPiece separators,
+ WhitespaceHandling whitespace,
+ SplitResult result_type) {
+ if (separators.size() == 1) {
+ return SplitStringT<std::string, std::string, char>(
+ input, separators[0], whitespace, result_type);
}
- if (!last_was_ws) {
- result->push_back(
- str.substr(last_non_ws_start, length - last_non_ws_start));
+ return SplitStringT<std::string, std::string, StringPiece>(
+ input, separators, whitespace, result_type);
+}
+
+std::vector<string16> SplitString(StringPiece16 input,
+ StringPiece16 separators,
+ WhitespaceHandling whitespace,
+ SplitResult result_type) {
+ if (separators.size() == 1) {
+ return SplitStringT<string16, string16, char16>(
+ input, separators[0], whitespace, result_type);
}
+ return SplitStringT<string16, string16, StringPiece16>(
+ input, separators, whitespace, result_type);
}
-} // namespace
+std::vector<StringPiece> SplitStringPiece(StringPiece input,
+ StringPiece separators,
+ WhitespaceHandling whitespace,
+ SplitResult result_type) {
+ if (separators.size() == 1) {
+ return SplitStringT<std::string, StringPiece, char>(
+ input, separators[0], whitespace, result_type);
+ }
+ return SplitStringT<std::string, StringPiece, StringPiece>(
+ input, separators, whitespace, result_type);
+}
+
+std::vector<StringPiece16> SplitStringPiece(StringPiece16 input,
+ StringPiece16 separators,
+ WhitespaceHandling whitespace,
+ SplitResult result_type) {
+ if (separators.size() == 1) {
+ return SplitStringT<string16, StringPiece16, char16>(
+ input, separators[0], whitespace, result_type);
+ }
+ return SplitStringT<string16, StringPiece16, StringPiece16>(
+ input, separators, whitespace, result_type);
+}
void SplitString(const string16& str,
char16 c,
- std::vector<string16>* r) {
+ std::vector<string16>* result) {
DCHECK(CBU16_IS_SINGLE(c));
- SplitStringT(str, c, true, r);
+ *result = SplitStringT<string16, string16, char16>(
+ str, c, TRIM_WHITESPACE, SPLIT_WANT_ALL);
+
+ // Backward-compat hack: The old SplitString implementation would keep
+ // empty substrings, for example:
+ // "a,,b" -> ["a", "", "b"]
+ // "a, ,b" -> ["a", "", "b"]
+ // which the current code also does. But the old one would discard them when
+ // the only result was that empty string:
+ // " " -> []
+ // In the latter case, our new code will give [""]
+ if (result->size() == 1 && (*result)[0].empty())
+ result->clear();
}
void SplitString(const std::string& str,
char c,
- std::vector<std::string>* r) {
+ std::vector<std::string>* result) {
#if CHAR_MIN < 0
DCHECK_GE(c, 0);
#endif
DCHECK_LT(c, 0x7F);
- SplitStringT(str, c, true, r);
+ *result = SplitStringT<std::string, std::string, char>(
+ str, c, TRIM_WHITESPACE, SPLIT_WANT_ALL);
+
+ // Backward-compat hack, see above.
+ if (result->size() == 1 && (*result)[0].empty())
+ result->clear();
+
}
bool SplitStringIntoKeyValuePairs(const std::string& line,
@@ -182,31 +272,37 @@ void SplitStringUsingSubstr(const std::string& str,
SplitStringUsingSubstrT(str, s, r);
}
-void SplitStringDontTrim(const string16& str,
+void SplitStringDontTrim(StringPiece16 str,
char16 c,
- std::vector<string16>* r) {
+ std::vector<string16>* result) {
DCHECK(CBU16_IS_SINGLE(c));
- SplitStringT(str, c, false, r);
+ *result = SplitStringT<string16, string16, char16>(
+ str, c, KEEP_WHITESPACE, SPLIT_WANT_ALL);
}
-void SplitStringDontTrim(const std::string& str,
+void SplitStringDontTrim(StringPiece str,
char c,
- std::vector<std::string>* r) {
+ std::vector<std::string>* result) {
#if CHAR_MIN < 0
DCHECK_GE(c, 0);
#endif
DCHECK_LT(c, 0x7F);
- SplitStringT(str, c, false, r);
+ *result = SplitStringT<std::string, std::string, char>(
+ str, c, KEEP_WHITESPACE, SPLIT_WANT_ALL);
}
void SplitStringAlongWhitespace(const string16& str,
std::vector<string16>* result) {
- SplitStringAlongWhitespaceT(str, result);
+ *result = SplitStringT<string16, string16, StringPiece16>(
+ str, StringPiece16(kWhitespaceASCIIAs16),
+ TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY);
}
void SplitStringAlongWhitespace(const std::string& str,
std::vector<std::string>* result) {
- SplitStringAlongWhitespaceT(str, result);
+ *result = SplitStringT<std::string, std::string, StringPiece>(
+ str, StringPiece(kWhitespaceASCII),
+ TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY);
}
} // namespace base
« no previous file with comments | « base/strings/string_split.h ('k') | base/strings/string_split_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698