Index: base/strings/string_split.cc |
diff --git a/base/strings/string_split.cc b/base/strings/string_split.cc |
index 88a623664fcc1c256de60c110c3186ecc0034885..8998c812c9a415042b750b1d3edf1349a53f7b53 100644 |
--- a/base/strings/string_split.cc |
+++ b/base/strings/string_split.cc |
@@ -12,26 +12,93 @@ namespace base { |
namespace { |
-template <typename STR> |
-void SplitStringT(const STR& str, |
- const typename STR::value_type s, |
- bool trim_whitespace, |
- std::vector<STR>* r) { |
- r->clear(); |
- size_t last = 0; |
- size_t c = str.size(); |
- for (size_t i = 0; i <= c; ++i) { |
- if (i == c || str[i] == s) { |
- STR tmp(str, last, i - last); |
- if (trim_whitespace) |
- TrimWhitespace(tmp, TRIM_ALL, &tmp); |
- // Avoid converting an empty or all-whitespace source string into a vector |
- // of one empty string. |
- if (i != c || !r->empty() || !tmp.empty()) |
- r->push_back(tmp); |
- last = i + 1; |
+// PieceToOutputType converts a StringPiece as needed to a given output type, |
+// which is either the same type of StringPiece (a NOP) or the corresponding |
+// non-piece string type. |
+// |
+// The default converter is a NOP, it works when the OutputType is the |
+// correct StringPiece. |
+template <typename Str, typename OutputType> |
+OutputType PieceToOutputType(BasicStringPiece<Str> piece) { |
+ return piece; |
+} |
+template <> // Convert StringPiece to std::string |
+std::string PieceToOutputType<std::string, std::string>(StringPiece piece) { |
+ return piece.as_string(); |
+} |
+template <> // Convert StringPiece16 to string16. |
+string16 PieceToOutputType<string16, string16>(StringPiece16 piece) { |
+ return piece.as_string(); |
+} |
+ |
+// Returns either the ASCII or UTF-16 whitespace. |
+template <typename Str> |
+BasicStringPiece<Str> WhitespaceForType(); |
+template <> |
+StringPiece16 WhitespaceForType<string16>() { |
+ return kWhitespaceUTF16; |
+} |
+template <> |
+StringPiece WhitespaceForType<std::string>() { |
+ return kWhitespaceASCII; |
+} |
+ |
+// Optimize the single-character case to call find() on the string instead, |
+// since this is the common case and can be made faster. This could have been |
+// done with template specialization too, but would have been less clear. |
+// |
+// There is no corresponding FindFirstNotOf because StringPiece already |
+// implements these different versions that do the optimized searching. |
+size_t FindFirstOf(StringPiece piece, char c, size_t pos) { |
+ return piece.find(c, pos); |
+} |
+size_t FindFirstOf(StringPiece16 piece, char16 c, size_t pos) { |
+ return piece.find(c, pos); |
+} |
+size_t FindFirstOf(StringPiece piece, StringPiece one_of, size_t pos) { |
+ return piece.find_first_of(one_of, pos); |
+} |
+size_t FindFirstOf(StringPiece16 piece, StringPiece16 one_of, size_t pos) { |
+ return piece.find_first_of(one_of, pos); |
+} |
+ |
+// General string splitter template. Can take 8- or 16-bit input, can produce |
+// the corresponding string or StringPiece output, and can take single- or |
+// multiple-character delimiters. |
+// |
+// DelimiterType is either a character (Str::value_type) or a string piece of |
+// multiple characters (BasicStringPiece<Str>). StringPiece has a version of |
+// find for both of these cases, and the single-character version is the most |
+// common and can be implemented faster, which is why this is a template. |
+template <typename Str, typename OutputStringType, typename DelimiterType> |
+static std::vector<OutputStringType> SplitStringT(BasicStringPiece<Str> str, |
+ DelimiterType delimiter, |
+ WhitespaceHandling whitespace, |
+ SplitResult result_type) { |
+ std::vector<OutputStringType> result; |
+ if (str.empty()) |
+ return result; |
+ |
+ size_t start = 0; |
+ while (start != Str::npos) { |
+ size_t end = FindFirstOf(str, delimiter, start); |
+ |
+ BasicStringPiece<Str> piece; |
+ if (end == Str::npos) { |
+ piece = str.substr(start); |
+ start = Str::npos; |
+ } else { |
+ piece = str.substr(start, end - start); |
+ start = end + 1; |
} |
+ |
+ if (whitespace == TRIM_WHITESPACE) |
+ piece = TrimString(piece, WhitespaceForType<Str>(), TRIM_ALL); |
+ |
+ if (result_type == SPLIT_WANT_ALL || !piece.empty()) |
+ result.push_back(PieceToOutputType<Str, OutputStringType>(piece)); |
} |
+ return result; |
} |
bool SplitStringIntoKeyValue(const std::string& line, |
@@ -62,8 +129,8 @@ bool SplitStringIntoKeyValue(const std::string& line, |
template <typename STR> |
void SplitStringUsingSubstrT(const STR& str, |
- const STR& s, |
- std::vector<STR>* r) { |
+ const STR& s, |
+ std::vector<STR>* r) { |
r->clear(); |
typename STR::size_type begin_index = 0; |
while (true) { |
@@ -83,64 +150,86 @@ void SplitStringUsingSubstrT(const STR& str, |
} |
} |
-template<typename STR> |
-void SplitStringAlongWhitespaceT(const STR& str, std::vector<STR>* result) { |
- result->clear(); |
- const size_t length = str.length(); |
- if (!length) |
- return; |
- |
- bool last_was_ws = false; |
- size_t last_non_ws_start = 0; |
- for (size_t i = 0; i < length; ++i) { |
- switch (str[i]) { |
- // HTML 5 defines whitespace as: space, tab, LF, line tab, FF, or CR. |
- case L' ': |
- case L'\t': |
- case L'\xA': |
- case L'\xB': |
- case L'\xC': |
- case L'\xD': |
- if (!last_was_ws) { |
- if (i > 0) { |
- result->push_back( |
- str.substr(last_non_ws_start, i - last_non_ws_start)); |
- } |
- last_was_ws = true; |
- } |
- break; |
- |
- default: // Not a space character. |
- if (last_was_ws) { |
- last_was_ws = false; |
- last_non_ws_start = i; |
- } |
- break; |
- } |
+} // namespace |
+ |
+std::vector<std::string> SplitString(StringPiece input, |
+ StringPiece separators, |
+ WhitespaceHandling whitespace, |
+ SplitResult result_type) { |
+ if (separators.size() == 1) { |
+ return SplitStringT<std::string, std::string, char>( |
+ input, separators[0], whitespace, result_type); |
} |
- if (!last_was_ws) { |
- result->push_back( |
- str.substr(last_non_ws_start, length - last_non_ws_start)); |
+ return SplitStringT<std::string, std::string, StringPiece>( |
+ input, separators, whitespace, result_type); |
+} |
+ |
+std::vector<string16> SplitString(StringPiece16 input, |
+ StringPiece16 separators, |
+ WhitespaceHandling whitespace, |
+ SplitResult result_type) { |
+ if (separators.size() == 1) { |
+ return SplitStringT<string16, string16, char16>(input, separators[0], |
+ whitespace, result_type); |
} |
+ return SplitStringT<string16, string16, StringPiece16>( |
+ input, separators, whitespace, result_type); |
} |
-} // namespace |
+std::vector<StringPiece> SplitStringPiece(StringPiece input, |
+ StringPiece separators, |
+ WhitespaceHandling whitespace, |
+ SplitResult result_type) { |
+ if (separators.size() == 1) { |
+ return SplitStringT<std::string, StringPiece, char>( |
+ input, separators[0], whitespace, result_type); |
+ } |
+ return SplitStringT<std::string, StringPiece, StringPiece>( |
+ input, separators, whitespace, result_type); |
+} |
-void SplitString(const string16& str, |
- char16 c, |
- std::vector<string16>* r) { |
+std::vector<StringPiece16> SplitStringPiece(StringPiece16 input, |
+ StringPiece16 separators, |
+ WhitespaceHandling whitespace, |
+ SplitResult result_type) { |
+ if (separators.size() == 1) { |
+ return SplitStringT<string16, StringPiece16, char16>( |
+ input, separators[0], whitespace, result_type); |
+ } |
+ return SplitStringT<string16, StringPiece16, StringPiece16>( |
+ input, separators, whitespace, result_type); |
+} |
+ |
+void SplitString(const string16& str, char16 c, std::vector<string16>* result) { |
DCHECK(CBU16_IS_SINGLE(c)); |
- SplitStringT(str, c, true, r); |
+ *result = SplitStringT<string16, string16, char16>(str, c, TRIM_WHITESPACE, |
+ SPLIT_WANT_ALL); |
+ |
+ // Backward-compat hack: The old SplitString implementation would keep |
+ // empty substrings, for example: |
+ // "a,,b" -> ["a", "", "b"] |
+ // "a, ,b" -> ["a", "", "b"] |
+ // which the current code also does. But the old one would discard them when |
+ // the only result was that empty string: |
+ // " " -> [] |
+ // In the latter case, our new code will give [""] |
+ if (result->size() == 1 && (*result)[0].empty()) |
+ result->clear(); |
} |
void SplitString(const std::string& str, |
char c, |
- std::vector<std::string>* r) { |
+ std::vector<std::string>* result) { |
#if CHAR_MIN < 0 |
DCHECK_GE(c, 0); |
#endif |
DCHECK_LT(c, 0x7F); |
- SplitStringT(str, c, true, r); |
+ *result = SplitStringT<std::string, std::string, char>( |
+ str, c, TRIM_WHITESPACE, SPLIT_WANT_ALL); |
+ |
+ // Backward-compat hack, see above. |
+ if (result->size() == 1 && (*result)[0].empty()) |
+ result->clear(); |
} |
bool SplitStringIntoKeyValuePairs(const std::string& line, |
@@ -182,31 +271,36 @@ void SplitStringUsingSubstr(const std::string& str, |
SplitStringUsingSubstrT(str, s, r); |
} |
-void SplitStringDontTrim(const string16& str, |
+void SplitStringDontTrim(StringPiece16 str, |
char16 c, |
- std::vector<string16>* r) { |
+ std::vector<string16>* result) { |
DCHECK(CBU16_IS_SINGLE(c)); |
- SplitStringT(str, c, false, r); |
+ *result = SplitStringT<string16, string16, char16>(str, c, KEEP_WHITESPACE, |
+ SPLIT_WANT_ALL); |
} |
-void SplitStringDontTrim(const std::string& str, |
+void SplitStringDontTrim(StringPiece str, |
char c, |
- std::vector<std::string>* r) { |
+ std::vector<std::string>* result) { |
#if CHAR_MIN < 0 |
DCHECK_GE(c, 0); |
#endif |
DCHECK_LT(c, 0x7F); |
- SplitStringT(str, c, false, r); |
+ *result = SplitStringT<std::string, std::string, char>( |
+ str, c, KEEP_WHITESPACE, SPLIT_WANT_ALL); |
} |
void SplitStringAlongWhitespace(const string16& str, |
std::vector<string16>* result) { |
- SplitStringAlongWhitespaceT(str, result); |
+ *result = SplitStringT<string16, string16, StringPiece16>( |
+ str, StringPiece16(kWhitespaceASCIIAs16), TRIM_WHITESPACE, |
+ SPLIT_WANT_NONEMPTY); |
} |
void SplitStringAlongWhitespace(const std::string& str, |
std::vector<std::string>* result) { |
- SplitStringAlongWhitespaceT(str, result); |
+ *result = SplitStringT<std::string, std::string, StringPiece>( |
+ str, StringPiece(kWhitespaceASCII), TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY); |
} |
} // namespace base |