base/strings/string_split.cc - Issue 1169393003: Add new SplitString backend.

Unified Diff: base/strings/string_split.cc

Issue 1169393003: Add new SplitString backend. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: base/strings/string_split.cc

diff --git a/base/strings/string_split.cc b/base/strings/string_split.cc

index 88a623664fcc1c256de60c110c3186ecc0034885..e23ce3fa039a087b3cd902aab55a18520599d9ae 100644

--- a/base/strings/string_split.cc

+++ b/base/strings/string_split.cc

@@ -12,26 +12,91 @@ namespace base {

namespace {

-template <typename STR>

-void SplitStringT(const STR& str,

- const typename STR::value_type s,

- bool trim_whitespace,

- std::vector<STR>* r) {

- r->clear();

- size_t last = 0;

- size_t c = str.size();

- for (size_t i = 0; i <= c; ++i) {

- if (i == c || str[i] == s) {

- STR tmp(str, last, i - last);

- if (trim_whitespace)

- TrimWhitespace(tmp, TRIM_ALL, &tmp);

- // Avoid converting an empty or all-whitespace source string into a vector

- // of one empty string.

- if (i != c || !r->empty() || !tmp.empty())

- r->push_back(tmp);

- last = i + 1;

+// PieceToOutputType converts a StringPiece as needed to a given output type,

+// which is either the same type of StringPiece (a NOP) or the corresponding

+// non-piece string type.

+//

+// The default converter is a NOP, it works when the OutputType is the

+// correct StringPiece.

+template<typename Str, typename OutputType>

+OutputType PieceToOutputType(BasicStringPiece<Str> piece) {

+ return piece;

+template<> // Convert StringPiece to std::string

+std::string PieceToOutputType<std::string, std::string>(StringPiece piece) {

+ return piece.as_string();

+template<> // Convert StringPiece16 to string16.

+string16 PieceToOutputType<string16, string16>(StringPiece16 piece) {

+ return piece.as_string();

+// Returns either the ASCII or UTF-16 whitespace.

+template<typename Str> BasicStringPiece<Str> WhitespaceForType();

+template<> StringPiece16 WhitespaceForType<string16>() {

+ return kWhitespaceUTF16;

+template<> StringPiece WhitespaceForType<std::string>() {

+ return kWhitespaceASCII;

+// Optimize the single-character case to call find() on the string instead,

+// since this is the common case and can be made faster. This could have been

+// done with template specialization too, but would have been less clear.

+//

+// There is no corresponding FindFirstNotOf because StringPiece already

+// implements these different versions that do the optimized searching.

+size_t FindFirstOf(StringPiece piece, char c, size_t pos) {

+ return piece.find(c, pos);

+size_t FindFirstOf(StringPiece16 piece, char16 c, size_t pos) {

+ return piece.find(c, pos);

+size_t FindFirstOf(StringPiece piece, StringPiece one_of, size_t pos) {

+ return piece.find_first_of(one_of, pos);

+size_t FindFirstOf(StringPiece16 piece, StringPiece16 one_of, size_t pos) {

+ return piece.find_first_of(one_of, pos);

+// General string splitter template. Can take 8- or 16-bit input, can produce

+// the corresponding string or StringPiece output, and can take single- or

+// multiple-character delimiters.

+//

+// DelimiterType is either a character (Str::value_type) or a string piece of

+// multiple characters (BasicStringPiece<Str>). StringPiece has a version of

+// find for both of these cases, and the single-character version is the most

+// common and can be implemented faster, which is why this is a template.

+template<typename Str, typename OutputStringType, typename DelimiterType>

+static std::vector<OutputStringType> SplitStringT(

+ BasicStringPiece<Str> str,

+ DelimiterType delimiter,

+ WhitespaceHandling whitespace,

+ SplitResult result_type) {

+ std::vector<OutputStringType> result;

+ if (str.empty())

+ return result;

+ size_t start = 0;

+ while (start != Str::npos) {

+ size_t end = FindFirstOf(str, delimiter, start);

+ BasicStringPiece<Str> piece;

+ if (end == Str::npos) {

+ piece = str.substr(start);

+ start = Str::npos;

+ } else {

+ piece = str.substr(start, end - start);

+ start = end + 1;

}

+ if (whitespace == TRIM_WHITESPACE)

+ piece = TrimString(piece, WhitespaceForType<Str>(), TRIM_ALL);

+ if (result_type == SPLIT_WANT_ALL || !piece.empty())

+ result.push_back(PieceToOutputType<Str, OutputStringType>(piece));

}

+ return result;

}

bool SplitStringIntoKeyValue(const std::string& line,

@@ -62,8 +127,8 @@ bool SplitStringIntoKeyValue(const std::string& line,

template <typename STR>

void SplitStringUsingSubstrT(const STR& str,

- const STR& s,

- std::vector<STR>* r) {

+ const STR& s,

+ std::vector<STR>* r) {

r->clear();

typename STR::size_type begin_index = 0;

while (true) {

@@ -83,64 +148,89 @@ void SplitStringUsingSubstrT(const STR& str,

}

-template<typename STR>

-void SplitStringAlongWhitespaceT(const STR& str, std::vector<STR>* result) {

- result->clear();

- const size_t length = str.length();

- if (!length)

- return;

- bool last_was_ws = false;

- size_t last_non_ws_start = 0;

- for (size_t i = 0; i < length; ++i) {

- switch (str[i]) {

- // HTML 5 defines whitespace as: space, tab, LF, line tab, FF, or CR.

- case L' ':

- case L'\t':

- case L'\xA':

- case L'\xB':

- case L'\xC':

- case L'\xD':

- if (!last_was_ws) {

- if (i > 0) {

- result->push_back(

- str.substr(last_non_ws_start, i - last_non_ws_start));

- }

- last_was_ws = true;

- }

- break;

- default: // Not a space character.

- if (last_was_ws) {

- last_was_ws = false;

- last_non_ws_start = i;

- }

- break;

- }

+} // namespace

+std::vector<std::string> SplitString(StringPiece input,

+ StringPiece separators,

+ WhitespaceHandling whitespace,

+ SplitResult result_type) {

+ if (separators.size() == 1) {

+ return SplitStringT<std::string, std::string, char>(

+ input, separators[0], whitespace, result_type);

}

- if (!last_was_ws) {

- result->push_back(

- str.substr(last_non_ws_start, length - last_non_ws_start));

+ return SplitStringT<std::string, std::string, StringPiece>(

+ input, separators, whitespace, result_type);

+std::vector<string16> SplitString(StringPiece16 input,

+ StringPiece16 separators,

+ WhitespaceHandling whitespace,

+ SplitResult result_type) {

+ if (separators.size() == 1) {

+ return SplitStringT<string16, string16, char16>(

+ input, separators[0], whitespace, result_type);

}

+ return SplitStringT<string16, string16, StringPiece16>(

+ input, separators, whitespace, result_type);

}

-} // namespace

+std::vector<StringPiece> SplitStringPiece(StringPiece input,

+ StringPiece separators,

+ WhitespaceHandling whitespace,

+ SplitResult result_type) {

+ if (separators.size() == 1) {

+ return SplitStringT<std::string, StringPiece, char>(

+ input, separators[0], whitespace, result_type);

+ }

+ return SplitStringT<std::string, StringPiece, StringPiece>(

+ input, separators, whitespace, result_type);

+std::vector<StringPiece16> SplitStringPiece(StringPiece16 input,

+ StringPiece16 separators,

+ WhitespaceHandling whitespace,

+ SplitResult result_type) {

+ if (separators.size() == 1) {

+ return SplitStringT<string16, StringPiece16, char16>(

+ input, separators[0], whitespace, result_type);

+ }

+ return SplitStringT<string16, StringPiece16, StringPiece16>(

+ input, separators, whitespace, result_type);

void SplitString(const string16& str,

char16 c,

- std::vector<string16>* r) {

+ std::vector<string16>* result) {

DCHECK(CBU16_IS_SINGLE(c));

- SplitStringT(str, c, true, r);

+ *result = SplitStringT<string16, string16, char16>(

+ str, c, TRIM_WHITESPACE, SPLIT_WANT_ALL);

+ // Backward-compat hack: The old SplitString implementation would keep

+ // empty substrings, for example:

+ // "a,,b" -> ["a", "", "b"]

+ // "a, ,b" -> ["a", "", "b"]

+ // which the current code also does. But the old one would discard them when

+ // the only result was that empty string:

+ // " " -> []

+ // In the latter case, our new code will give [""]

+ if (result->size() == 1 && (*result)[0].empty())

+ result->clear();

}

void SplitString(const std::string& str,

char c,

- std::vector<std::string>* r) {

+ std::vector<std::string>* result) {

#if CHAR_MIN < 0

DCHECK_GE(c, 0);

#endif

DCHECK_LT(c, 0x7F);

- SplitStringT(str, c, true, r);

+ *result = SplitStringT<std::string, std::string, char>(

+ str, c, TRIM_WHITESPACE, SPLIT_WANT_ALL);

+ // Backward-compat hack, see above.

+ if (result->size() == 1 && (*result)[0].empty())

+ result->clear();

}

bool SplitStringIntoKeyValuePairs(const std::string& line,

@@ -182,31 +272,37 @@ void SplitStringUsingSubstr(const std::string& str,

SplitStringUsingSubstrT(str, s, r);

}

-void SplitStringDontTrim(const string16& str,

+void SplitStringDontTrim(StringPiece16 str,

char16 c,

- std::vector<string16>* r) {

+ std::vector<string16>* result) {

DCHECK(CBU16_IS_SINGLE(c));

- SplitStringT(str, c, false, r);

+ *result = SplitStringT<string16, string16, char16>(

+ str, c, KEEP_WHITESPACE, SPLIT_WANT_ALL);

}

-void SplitStringDontTrim(const std::string& str,

+void SplitStringDontTrim(StringPiece str,

char c,

- std::vector<std::string>* r) {

+ std::vector<std::string>* result) {

#if CHAR_MIN < 0

DCHECK_GE(c, 0);

#endif

DCHECK_LT(c, 0x7F);

- SplitStringT(str, c, false, r);

+ *result = SplitStringT<std::string, std::string, char>(

+ str, c, KEEP_WHITESPACE, SPLIT_WANT_ALL);

}

void SplitStringAlongWhitespace(const string16& str,

std::vector<string16>* result) {

- SplitStringAlongWhitespaceT(str, result);

+ *result = SplitStringT<string16, string16, StringPiece16>(

+ str, StringPiece16(kWhitespaceASCIIAs16),

+ TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY);

}

void SplitStringAlongWhitespace(const std::string& str,

std::vector<std::string>* result) {

- SplitStringAlongWhitespaceT(str, result);

+ *result = SplitStringT<std::string, std::string, StringPiece>(

+ str, StringPiece(kWhitespaceASCII),

+ TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY);

}

} // namespace base

« no previous file with comments | « base/strings/string_split.h ('k') | base/strings/string_split_unittest.cc » ('j') | no next file with comments »