Chromium Code Reviews| Index: base/strings/string_split.cc |
| diff --git a/base/strings/string_split.cc b/base/strings/string_split.cc |
| index 88a623664fcc1c256de60c110c3186ecc0034885..3b1311ea2cc14ea7ebe82c6825d26b88e5200187 100644 |
| --- a/base/strings/string_split.cc |
| +++ b/base/strings/string_split.cc |
| @@ -12,26 +12,91 @@ namespace base { |
| namespace { |
| -template <typename STR> |
| -void SplitStringT(const STR& str, |
| - const typename STR::value_type s, |
| - bool trim_whitespace, |
| - std::vector<STR>* r) { |
| - r->clear(); |
| - size_t last = 0; |
| - size_t c = str.size(); |
| - for (size_t i = 0; i <= c; ++i) { |
| - if (i == c || str[i] == s) { |
| - STR tmp(str, last, i - last); |
| - if (trim_whitespace) |
| - TrimWhitespace(tmp, TRIM_ALL, &tmp); |
| - // Avoid converting an empty or all-whitespace source string into a vector |
| - // of one empty string. |
| - if (i != c || !r->empty() || !tmp.empty()) |
| - r->push_back(tmp); |
| - last = i + 1; |
| +// PieceToOutputType converts a StringPiece as needed to a given output type, |
| +// which is either the same type of StringPiece (a NOP) or the corresponding |
| +// non-piece string type. |
| +// |
| +// The default converter is a NOP, it works when the OutputType is the |
| +// correct StringPiece. |
| +template<typename Str, typename OutputType> |
| +OutputType PieceToOutputType(BasicStringPiece<Str> piece) { |
| + return piece; |
| +} |
| +template<> // Convert StringPiece to std::string |
| +std::string PieceToOutputType<std::string, std::string>(StringPiece piece) { |
| + return piece.as_string(); |
| +} |
| +template<> // Convert StringPiece16 to string16. |
| +string16 PieceToOutputType<string16, string16>(StringPiece16 piece) { |
| + return piece.as_string(); |
| +} |
| + |
| +// Returns either the ASCII or UTF-16 whitespace. |
| +template<typename Str> BasicStringPiece<Str> WhitespaceForType(); |
| +template<> StringPiece16 WhitespaceForType<string16>() { |
| + return kWhitespaceUTF16; |
| +} |
| +template<> StringPiece WhitespaceForType<std::string>() { |
| + return kWhitespaceASCII; |
| +} |
| + |
| +// Optimize the single-character case to call find() on the string instead, |
| +// since this is the common case and can be made faster. This could have been |
| +// done with template specialization too, but would have been less clear. |
| +// |
| +// There is no corresponding FindFirstNotOf because StringPiece already |
| +// implements these different versions that do the optimized searching. |
| +size_t FindFirstOf(StringPiece piece, char c, size_t pos) { |
| + return piece.find(c, pos); |
| +} |
| +size_t FindFirstOf(StringPiece16 piece, char16 c, size_t pos) { |
| + return piece.find(c, pos); |
| +} |
| +size_t FindFirstOf(StringPiece piece, StringPiece one_of, size_t pos) { |
| + return piece.find_first_of(one_of, pos); |
| +} |
| +size_t FindFirstOf(StringPiece16 piece, StringPiece16 one_of, size_t pos) { |
| + return piece.find_first_of(one_of, pos); |
| +} |
| + |
| +// General string splitter template. Can take 8- or 16-bit input, can produce |
| +// the corresponding string or StringPiece output, and can take single- or |
| +// multiple-character delimiters. |
| +// |
| +// DelimiterType is either a character (Str::value_type> or a string piece of |
|
danakj
2015/06/11 23:58:34
typo here with value_type>
|
| +// multiple characters (BasicStringPiece<Str>). StringPiece has version of |
|
danakj
2015/06/11 23:58:34
has a version
|
| +// find for both of these cases, and the single-character version is the most |
| +// common and can be implemented faster, which is why this is a template. |
| +template<typename Str, typename OutputStringType, typename DelimiterType> |
| +static std::vector<OutputStringType> SplitStringT( |
| + BasicStringPiece<Str> str, |
| + DelimiterType delimiter, |
| + WhitespaceHandling whitespace, |
| + SplitResult result_type) { |
| + std::vector<OutputStringType> result; |
| + if (str.empty()) |
| + return result; |
| + |
| + size_t start = 0; |
| + while (start != Str::npos) { |
| + size_t end = FindFirstOf(str, delimiter, start); |
| + |
| + BasicStringPiece<Str> piece; |
| + if (end == Str::npos) { |
| + piece = str.substr(start); |
| + start = Str::npos; |
| + } else { |
| + piece = str.substr(start, end - start); |
| + start = end + 1; |
| } |
| + |
| + if (whitespace == TRIM_WHITESPACE) |
| + piece = TrimString(piece, WhitespaceForType<Str>(), TRIM_ALL); |
| + |
| + if (result_type == SPLIT_WANT_ALL || !piece.empty()) |
| + result.push_back(PieceToOutputType<Str, OutputStringType>(piece)); |
| } |
| + return result; |
| } |
| bool SplitStringIntoKeyValue(const std::string& line, |
| @@ -62,8 +127,8 @@ bool SplitStringIntoKeyValue(const std::string& line, |
| template <typename STR> |
| void SplitStringUsingSubstrT(const STR& str, |
| - const STR& s, |
| - std::vector<STR>* r) { |
| + const STR& s, |
| + std::vector<STR>* r) { |
| r->clear(); |
| typename STR::size_type begin_index = 0; |
| while (true) { |
| @@ -83,64 +148,89 @@ void SplitStringUsingSubstrT(const STR& str, |
| } |
| } |
| -template<typename STR> |
| -void SplitStringAlongWhitespaceT(const STR& str, std::vector<STR>* result) { |
| - result->clear(); |
| - const size_t length = str.length(); |
| - if (!length) |
| - return; |
| - |
| - bool last_was_ws = false; |
| - size_t last_non_ws_start = 0; |
| - for (size_t i = 0; i < length; ++i) { |
| - switch (str[i]) { |
| - // HTML 5 defines whitespace as: space, tab, LF, line tab, FF, or CR. |
| - case L' ': |
| - case L'\t': |
| - case L'\xA': |
| - case L'\xB': |
| - case L'\xC': |
| - case L'\xD': |
| - if (!last_was_ws) { |
| - if (i > 0) { |
| - result->push_back( |
| - str.substr(last_non_ws_start, i - last_non_ws_start)); |
| - } |
| - last_was_ws = true; |
| - } |
| - break; |
| - |
| - default: // Not a space character. |
| - if (last_was_ws) { |
| - last_was_ws = false; |
| - last_non_ws_start = i; |
| - } |
| - break; |
| - } |
| +} // namespace |
| + |
| +std::vector<std::string> SplitString(StringPiece input, |
| + StringPiece separators, |
| + WhitespaceHandling whitespace, |
| + SplitResult result_type) { |
| + if (separators.size() == 1) { |
| + return SplitStringT<std::string, std::string, char>( |
| + input, separators[0], whitespace, result_type); |
| } |
| - if (!last_was_ws) { |
| - result->push_back( |
| - str.substr(last_non_ws_start, length - last_non_ws_start)); |
| + return SplitStringT<std::string, std::string, StringPiece>( |
| + input, separators, whitespace, result_type); |
| +} |
| + |
| +std::vector<string16> SplitString(StringPiece16 input, |
| + StringPiece16 separators, |
| + WhitespaceHandling whitespace, |
| + SplitResult result_type) { |
| + if (separators.size() == 1) { |
| + return SplitStringT<string16, string16, char16>( |
| + input, separators[0], whitespace, result_type); |
| } |
| + return SplitStringT<string16, string16, StringPiece16>( |
| + input, separators, whitespace, result_type); |
| } |
| -} // namespace |
| +std::vector<StringPiece> SplitStringPiece(StringPiece input, |
| + StringPiece separators, |
| + WhitespaceHandling whitespace, |
| + SplitResult result_type) { |
| + if (separators.size() == 1) { |
| + return SplitStringT<std::string, StringPiece, char>( |
| + input, separators[0], whitespace, result_type); |
| + } |
| + return SplitStringT<std::string, StringPiece, StringPiece>( |
| + input, separators, whitespace, result_type); |
| +} |
| + |
| +std::vector<StringPiece16> SplitStringPiece(StringPiece16 input, |
| + StringPiece16 separators, |
| + WhitespaceHandling whitespace, |
| + SplitResult result_type) { |
| + if (separators.size() == 1) { |
| + return SplitStringT<string16, StringPiece16, char16>( |
| + input, separators[0], whitespace, result_type); |
| + } |
| + return SplitStringT<string16, StringPiece16, StringPiece16>( |
| + input, separators, whitespace, result_type); |
| +} |
| void SplitString(const string16& str, |
| char16 c, |
| - std::vector<string16>* r) { |
| + std::vector<string16>* result) { |
| DCHECK(CBU16_IS_SINGLE(c)); |
| - SplitStringT(str, c, true, r); |
| + *result = SplitStringT<string16, string16, char16>( |
| + str, c, TRIM_WHITESPACE, SPLIT_WANT_ALL); |
| + |
| + // Backward-compat hack: The old SplitString implementation would keep |
| + // empty substrings, for example: |
| + // "a,,b" -> ["a", "", "b"] |
| + // "a, ,b" -> ["a", "", "b"] |
| + // which the current code also does. But the old one would discard them when |
| + // the only result was that empty string: |
| + // " " -> [] |
| + // In the latter case, our new code will give [""] |
| + if (result->size() == 1 && (*result)[0].empty()) |
| + result->clear(); |
| } |
| void SplitString(const std::string& str, |
| char c, |
| - std::vector<std::string>* r) { |
| + std::vector<std::string>* result) { |
| #if CHAR_MIN < 0 |
| DCHECK_GE(c, 0); |
| #endif |
| DCHECK_LT(c, 0x7F); |
| - SplitStringT(str, c, true, r); |
| + *result = SplitStringT<std::string, std::string, char>( |
| + str, c, TRIM_WHITESPACE, SPLIT_WANT_ALL); |
| + |
| + // Backward-compat hack, see above. |
| + if (result->size() == 1 && (*result)[0].empty()) |
| + result->clear(); |
| + |
| } |
| bool SplitStringIntoKeyValuePairs(const std::string& line, |
| @@ -182,31 +272,37 @@ void SplitStringUsingSubstr(const std::string& str, |
| SplitStringUsingSubstrT(str, s, r); |
| } |
| -void SplitStringDontTrim(const string16& str, |
| +void SplitStringDontTrim(StringPiece16 str, |
| char16 c, |
| - std::vector<string16>* r) { |
| + std::vector<string16>* result) { |
| DCHECK(CBU16_IS_SINGLE(c)); |
| - SplitStringT(str, c, false, r); |
| + *result = SplitStringT<string16, string16, char16>( |
| + str, c, KEEP_WHITESPACE, SPLIT_WANT_ALL); |
| } |
| -void SplitStringDontTrim(const std::string& str, |
| +void SplitStringDontTrim(StringPiece str, |
| char c, |
| - std::vector<std::string>* r) { |
| + std::vector<std::string>* result) { |
| #if CHAR_MIN < 0 |
| DCHECK_GE(c, 0); |
| #endif |
| DCHECK_LT(c, 0x7F); |
| - SplitStringT(str, c, false, r); |
| + *result = SplitStringT<std::string, std::string, char>( |
| + str, c, KEEP_WHITESPACE, SPLIT_WANT_ALL); |
| } |
| void SplitStringAlongWhitespace(const string16& str, |
| std::vector<string16>* result) { |
| - SplitStringAlongWhitespaceT(str, result); |
| + *result = SplitStringT<string16, string16, StringPiece16>( |
| + str, StringPiece16(kWhitespaceASCIIAs16), |
| + TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY); |
| } |
| void SplitStringAlongWhitespace(const std::string& str, |
| std::vector<std::string>* result) { |
| - SplitStringAlongWhitespaceT(str, result); |
| + *result = SplitStringT<std::string, std::string, StringPiece>( |
| + str, StringPiece(kWhitespaceASCII), |
| + TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY); |
| } |
| } // namespace base |