| Index: base/strings/string_split.cc
|
| diff --git a/base/strings/string_split.cc b/base/strings/string_split.cc
|
| index 88a623664fcc1c256de60c110c3186ecc0034885..e23ce3fa039a087b3cd902aab55a18520599d9ae 100644
|
| --- a/base/strings/string_split.cc
|
| +++ b/base/strings/string_split.cc
|
| @@ -12,26 +12,91 @@ namespace base {
|
|
|
| namespace {
|
|
|
| -template <typename STR>
|
| -void SplitStringT(const STR& str,
|
| - const typename STR::value_type s,
|
| - bool trim_whitespace,
|
| - std::vector<STR>* r) {
|
| - r->clear();
|
| - size_t last = 0;
|
| - size_t c = str.size();
|
| - for (size_t i = 0; i <= c; ++i) {
|
| - if (i == c || str[i] == s) {
|
| - STR tmp(str, last, i - last);
|
| - if (trim_whitespace)
|
| - TrimWhitespace(tmp, TRIM_ALL, &tmp);
|
| - // Avoid converting an empty or all-whitespace source string into a vector
|
| - // of one empty string.
|
| - if (i != c || !r->empty() || !tmp.empty())
|
| - r->push_back(tmp);
|
| - last = i + 1;
|
| +// PieceToOutputType converts a StringPiece as needed to a given output type,
|
| +// which is either the same type of StringPiece (a NOP) or the corresponding
|
| +// non-piece string type.
|
| +//
|
| +// The default converter is a NOP, it works when the OutputType is the
|
| +// correct StringPiece.
|
| +template<typename Str, typename OutputType>
|
| +OutputType PieceToOutputType(BasicStringPiece<Str> piece) {
|
| + return piece;
|
| +}
|
| +template<> // Convert StringPiece to std::string
|
| +std::string PieceToOutputType<std::string, std::string>(StringPiece piece) {
|
| + return piece.as_string();
|
| +}
|
| +template<> // Convert StringPiece16 to string16.
|
| +string16 PieceToOutputType<string16, string16>(StringPiece16 piece) {
|
| + return piece.as_string();
|
| +}
|
| +
|
| +// Returns either the ASCII or UTF-16 whitespace.
|
| +template<typename Str> BasicStringPiece<Str> WhitespaceForType();
|
| +template<> StringPiece16 WhitespaceForType<string16>() {
|
| + return kWhitespaceUTF16;
|
| +}
|
| +template<> StringPiece WhitespaceForType<std::string>() {
|
| + return kWhitespaceASCII;
|
| +}
|
| +
|
| +// Optimize the single-character case to call find() on the string instead,
|
| +// since this is the common case and can be made faster. This could have been
|
| +// done with template specialization too, but would have been less clear.
|
| +//
|
| +// There is no corresponding FindFirstNotOf because StringPiece already
|
| +// implements these different versions that do the optimized searching.
|
| +size_t FindFirstOf(StringPiece piece, char c, size_t pos) {
|
| + return piece.find(c, pos);
|
| +}
|
| +size_t FindFirstOf(StringPiece16 piece, char16 c, size_t pos) {
|
| + return piece.find(c, pos);
|
| +}
|
| +size_t FindFirstOf(StringPiece piece, StringPiece one_of, size_t pos) {
|
| + return piece.find_first_of(one_of, pos);
|
| +}
|
| +size_t FindFirstOf(StringPiece16 piece, StringPiece16 one_of, size_t pos) {
|
| + return piece.find_first_of(one_of, pos);
|
| +}
|
| +
|
| +// General string splitter template. Can take 8- or 16-bit input, can produce
|
| +// the corresponding string or StringPiece output, and can take single- or
|
| +// multiple-character delimiters.
|
| +//
|
| +// DelimiterType is either a character (Str::value_type) or a string piece of
|
| +// multiple characters (BasicStringPiece<Str>). StringPiece has a version of
|
| +// find for both of these cases, and the single-character version is the most
|
| +// common and can be implemented faster, which is why this is a template.
|
| +template<typename Str, typename OutputStringType, typename DelimiterType>
|
| +static std::vector<OutputStringType> SplitStringT(
|
| + BasicStringPiece<Str> str,
|
| + DelimiterType delimiter,
|
| + WhitespaceHandling whitespace,
|
| + SplitResult result_type) {
|
| + std::vector<OutputStringType> result;
|
| + if (str.empty())
|
| + return result;
|
| +
|
| + size_t start = 0;
|
| + while (start != Str::npos) {
|
| + size_t end = FindFirstOf(str, delimiter, start);
|
| +
|
| + BasicStringPiece<Str> piece;
|
| + if (end == Str::npos) {
|
| + piece = str.substr(start);
|
| + start = Str::npos;
|
| + } else {
|
| + piece = str.substr(start, end - start);
|
| + start = end + 1;
|
| }
|
| +
|
| + if (whitespace == TRIM_WHITESPACE)
|
| + piece = TrimString(piece, WhitespaceForType<Str>(), TRIM_ALL);
|
| +
|
| + if (result_type == SPLIT_WANT_ALL || !piece.empty())
|
| + result.push_back(PieceToOutputType<Str, OutputStringType>(piece));
|
| }
|
| + return result;
|
| }
|
|
|
| bool SplitStringIntoKeyValue(const std::string& line,
|
| @@ -62,8 +127,8 @@ bool SplitStringIntoKeyValue(const std::string& line,
|
|
|
| template <typename STR>
|
| void SplitStringUsingSubstrT(const STR& str,
|
| - const STR& s,
|
| - std::vector<STR>* r) {
|
| + const STR& s,
|
| + std::vector<STR>* r) {
|
| r->clear();
|
| typename STR::size_type begin_index = 0;
|
| while (true) {
|
| @@ -83,64 +148,89 @@ void SplitStringUsingSubstrT(const STR& str,
|
| }
|
| }
|
|
|
| -template<typename STR>
|
| -void SplitStringAlongWhitespaceT(const STR& str, std::vector<STR>* result) {
|
| - result->clear();
|
| - const size_t length = str.length();
|
| - if (!length)
|
| - return;
|
| -
|
| - bool last_was_ws = false;
|
| - size_t last_non_ws_start = 0;
|
| - for (size_t i = 0; i < length; ++i) {
|
| - switch (str[i]) {
|
| - // HTML 5 defines whitespace as: space, tab, LF, line tab, FF, or CR.
|
| - case L' ':
|
| - case L'\t':
|
| - case L'\xA':
|
| - case L'\xB':
|
| - case L'\xC':
|
| - case L'\xD':
|
| - if (!last_was_ws) {
|
| - if (i > 0) {
|
| - result->push_back(
|
| - str.substr(last_non_ws_start, i - last_non_ws_start));
|
| - }
|
| - last_was_ws = true;
|
| - }
|
| - break;
|
| -
|
| - default: // Not a space character.
|
| - if (last_was_ws) {
|
| - last_was_ws = false;
|
| - last_non_ws_start = i;
|
| - }
|
| - break;
|
| - }
|
| +} // namespace
|
| +
|
| +std::vector<std::string> SplitString(StringPiece input,
|
| + StringPiece separators,
|
| + WhitespaceHandling whitespace,
|
| + SplitResult result_type) {
|
| + if (separators.size() == 1) {
|
| + return SplitStringT<std::string, std::string, char>(
|
| + input, separators[0], whitespace, result_type);
|
| }
|
| - if (!last_was_ws) {
|
| - result->push_back(
|
| - str.substr(last_non_ws_start, length - last_non_ws_start));
|
| + return SplitStringT<std::string, std::string, StringPiece>(
|
| + input, separators, whitespace, result_type);
|
| +}
|
| +
|
| +std::vector<string16> SplitString(StringPiece16 input,
|
| + StringPiece16 separators,
|
| + WhitespaceHandling whitespace,
|
| + SplitResult result_type) {
|
| + if (separators.size() == 1) {
|
| + return SplitStringT<string16, string16, char16>(
|
| + input, separators[0], whitespace, result_type);
|
| }
|
| + return SplitStringT<string16, string16, StringPiece16>(
|
| + input, separators, whitespace, result_type);
|
| }
|
|
|
| -} // namespace
|
| +std::vector<StringPiece> SplitStringPiece(StringPiece input,
|
| + StringPiece separators,
|
| + WhitespaceHandling whitespace,
|
| + SplitResult result_type) {
|
| + if (separators.size() == 1) {
|
| + return SplitStringT<std::string, StringPiece, char>(
|
| + input, separators[0], whitespace, result_type);
|
| + }
|
| + return SplitStringT<std::string, StringPiece, StringPiece>(
|
| + input, separators, whitespace, result_type);
|
| +}
|
| +
|
| +std::vector<StringPiece16> SplitStringPiece(StringPiece16 input,
|
| + StringPiece16 separators,
|
| + WhitespaceHandling whitespace,
|
| + SplitResult result_type) {
|
| + if (separators.size() == 1) {
|
| + return SplitStringT<string16, StringPiece16, char16>(
|
| + input, separators[0], whitespace, result_type);
|
| + }
|
| + return SplitStringT<string16, StringPiece16, StringPiece16>(
|
| + input, separators, whitespace, result_type);
|
| +}
|
|
|
| void SplitString(const string16& str,
|
| char16 c,
|
| - std::vector<string16>* r) {
|
| + std::vector<string16>* result) {
|
| DCHECK(CBU16_IS_SINGLE(c));
|
| - SplitStringT(str, c, true, r);
|
| + *result = SplitStringT<string16, string16, char16>(
|
| + str, c, TRIM_WHITESPACE, SPLIT_WANT_ALL);
|
| +
|
| + // Backward-compat hack: The old SplitString implementation would keep
|
| + // empty substrings, for example:
|
| + // "a,,b" -> ["a", "", "b"]
|
| + // "a, ,b" -> ["a", "", "b"]
|
| + // which the current code also does. But the old one would discard them when
|
| + // the only result was that empty string:
|
| + // " " -> []
|
| + // In the latter case, our new code will give [""]
|
| + if (result->size() == 1 && (*result)[0].empty())
|
| + result->clear();
|
| }
|
|
|
| void SplitString(const std::string& str,
|
| char c,
|
| - std::vector<std::string>* r) {
|
| + std::vector<std::string>* result) {
|
| #if CHAR_MIN < 0
|
| DCHECK_GE(c, 0);
|
| #endif
|
| DCHECK_LT(c, 0x7F);
|
| - SplitStringT(str, c, true, r);
|
| + *result = SplitStringT<std::string, std::string, char>(
|
| + str, c, TRIM_WHITESPACE, SPLIT_WANT_ALL);
|
| +
|
| + // Backward-compat hack, see above.
|
| + if (result->size() == 1 && (*result)[0].empty())
|
| + result->clear();
|
| +
|
| }
|
|
|
| bool SplitStringIntoKeyValuePairs(const std::string& line,
|
| @@ -182,31 +272,37 @@ void SplitStringUsingSubstr(const std::string& str,
|
| SplitStringUsingSubstrT(str, s, r);
|
| }
|
|
|
| -void SplitStringDontTrim(const string16& str,
|
| +void SplitStringDontTrim(StringPiece16 str,
|
| char16 c,
|
| - std::vector<string16>* r) {
|
| + std::vector<string16>* result) {
|
| DCHECK(CBU16_IS_SINGLE(c));
|
| - SplitStringT(str, c, false, r);
|
| + *result = SplitStringT<string16, string16, char16>(
|
| + str, c, KEEP_WHITESPACE, SPLIT_WANT_ALL);
|
| }
|
|
|
| -void SplitStringDontTrim(const std::string& str,
|
| +void SplitStringDontTrim(StringPiece str,
|
| char c,
|
| - std::vector<std::string>* r) {
|
| + std::vector<std::string>* result) {
|
| #if CHAR_MIN < 0
|
| DCHECK_GE(c, 0);
|
| #endif
|
| DCHECK_LT(c, 0x7F);
|
| - SplitStringT(str, c, false, r);
|
| + *result = SplitStringT<std::string, std::string, char>(
|
| + str, c, KEEP_WHITESPACE, SPLIT_WANT_ALL);
|
| }
|
|
|
| void SplitStringAlongWhitespace(const string16& str,
|
| std::vector<string16>* result) {
|
| - SplitStringAlongWhitespaceT(str, result);
|
| + *result = SplitStringT<string16, string16, StringPiece16>(
|
| + str, StringPiece16(kWhitespaceASCIIAs16),
|
| + TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY);
|
| }
|
|
|
| void SplitStringAlongWhitespace(const std::string& str,
|
| std::vector<std::string>* result) {
|
| - SplitStringAlongWhitespaceT(str, result);
|
| + *result = SplitStringT<std::string, std::string, StringPiece>(
|
| + str, StringPiece(kWhitespaceASCII),
|
| + TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY);
|
| }
|
|
|
| } // namespace base
|
|
|