OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "base/strings/string_split.h" | 5 #include "base/strings/string_split.h" |
6 | 6 |
7 #include "base/logging.h" | 7 #include "base/logging.h" |
8 #include "base/strings/string_util.h" | 8 #include "base/strings/string_util.h" |
9 #include "base/third_party/icu/icu_utf.h" | 9 #include "base/third_party/icu/icu_utf.h" |
10 | 10 |
11 namespace base { | 11 namespace base { |
12 | 12 |
13 namespace { | 13 namespace { |
14 | 14 |
15 template <typename STR> | 15 // PieceToOutputType converts a StringPiece as needed to a given output type, |
16 void SplitStringT(const STR& str, | 16 // which is either the same type of StringPiece (a NOP) or the corresponding |
17 const typename STR::value_type s, | 17 // non-piece string type. |
18 bool trim_whitespace, | 18 // |
19 std::vector<STR>* r) { | 19 // The default converter is a NOP, it works when the OutputType is the |
20 r->clear(); | 20 // correct StringPiece. |
21 size_t last = 0; | 21 template <typename Str, typename OutputType> |
22 size_t c = str.size(); | 22 OutputType PieceToOutputType(BasicStringPiece<Str> piece) { |
23 for (size_t i = 0; i <= c; ++i) { | 23 return piece; |
24 if (i == c || str[i] == s) { | 24 } |
25 STR tmp(str, last, i - last); | 25 template <> // Convert StringPiece to std::string |
26 if (trim_whitespace) | 26 std::string PieceToOutputType<std::string, std::string>(StringPiece piece) { |
27 TrimWhitespace(tmp, TRIM_ALL, &tmp); | 27 return piece.as_string(); |
28 // Avoid converting an empty or all-whitespace source string into a vector | 28 } |
29 // of one empty string. | 29 template <> // Convert StringPiece16 to string16. |
30 if (i != c || !r->empty() || !tmp.empty()) | 30 string16 PieceToOutputType<string16, string16>(StringPiece16 piece) { |
31 r->push_back(tmp); | 31 return piece.as_string(); |
32 last = i + 1; | 32 } |
| 33 |
| 34 // Returns either the ASCII or UTF-16 whitespace. |
| 35 template <typename Str> |
| 36 BasicStringPiece<Str> WhitespaceForType(); |
| 37 template <> |
| 38 StringPiece16 WhitespaceForType<string16>() { |
| 39 return kWhitespaceUTF16; |
| 40 } |
| 41 template <> |
| 42 StringPiece WhitespaceForType<std::string>() { |
| 43 return kWhitespaceASCII; |
| 44 } |
| 45 |
| 46 // Optimize the single-character case to call find() on the string instead, |
| 47 // since this is the common case and can be made faster. This could have been |
| 48 // done with template specialization too, but would have been less clear. |
| 49 // |
| 50 // There is no corresponding FindFirstNotOf because StringPiece already |
| 51 // implements these different versions that do the optimized searching. |
| 52 size_t FindFirstOf(StringPiece piece, char c, size_t pos) { |
| 53 return piece.find(c, pos); |
| 54 } |
| 55 size_t FindFirstOf(StringPiece16 piece, char16 c, size_t pos) { |
| 56 return piece.find(c, pos); |
| 57 } |
| 58 size_t FindFirstOf(StringPiece piece, StringPiece one_of, size_t pos) { |
| 59 return piece.find_first_of(one_of, pos); |
| 60 } |
| 61 size_t FindFirstOf(StringPiece16 piece, StringPiece16 one_of, size_t pos) { |
| 62 return piece.find_first_of(one_of, pos); |
| 63 } |
| 64 |
| 65 // General string splitter template. Can take 8- or 16-bit input, can produce |
| 66 // the corresponding string or StringPiece output, and can take single- or |
| 67 // multiple-character delimiters. |
| 68 // |
| 69 // DelimiterType is either a character (Str::value_type) or a string piece of |
| 70 // multiple characters (BasicStringPiece<Str>). StringPiece has a version of |
| 71 // find for both of these cases, and the single-character version is the most |
| 72 // common and can be implemented faster, which is why this is a template. |
| 73 template <typename Str, typename OutputStringType, typename DelimiterType> |
| 74 static std::vector<OutputStringType> SplitStringT(BasicStringPiece<Str> str, |
| 75 DelimiterType delimiter, |
| 76 WhitespaceHandling whitespace, |
| 77 SplitResult result_type) { |
| 78 std::vector<OutputStringType> result; |
| 79 if (str.empty()) |
| 80 return result; |
| 81 |
| 82 size_t start = 0; |
| 83 while (start != Str::npos) { |
| 84 size_t end = FindFirstOf(str, delimiter, start); |
| 85 |
| 86 BasicStringPiece<Str> piece; |
| 87 if (end == Str::npos) { |
| 88 piece = str.substr(start); |
| 89 start = Str::npos; |
| 90 } else { |
| 91 piece = str.substr(start, end - start); |
| 92 start = end + 1; |
33 } | 93 } |
| 94 |
| 95 if (whitespace == TRIM_WHITESPACE) |
| 96 piece = TrimString(piece, WhitespaceForType<Str>(), TRIM_ALL); |
| 97 |
| 98 if (result_type == SPLIT_WANT_ALL || !piece.empty()) |
| 99 result.push_back(PieceToOutputType<Str, OutputStringType>(piece)); |
34 } | 100 } |
| 101 return result; |
35 } | 102 } |
36 | 103 |
37 bool SplitStringIntoKeyValue(const std::string& line, | 104 bool SplitStringIntoKeyValue(const std::string& line, |
38 char key_value_delimiter, | 105 char key_value_delimiter, |
39 std::string* key, | 106 std::string* key, |
40 std::string* value) { | 107 std::string* value) { |
41 key->clear(); | 108 key->clear(); |
42 value->clear(); | 109 value->clear(); |
43 | 110 |
44 // Find the delimiter. | 111 // Find the delimiter. |
(...skipping 10 matching lines...) Expand all Loading... |
55 if (begin_value_pos == std::string::npos) { | 122 if (begin_value_pos == std::string::npos) { |
56 DVLOG(1) << "cannot parse value from line: " << line; | 123 DVLOG(1) << "cannot parse value from line: " << line; |
57 return false; // no value | 124 return false; // no value |
58 } | 125 } |
59 value->assign(remains, begin_value_pos, remains.size() - begin_value_pos); | 126 value->assign(remains, begin_value_pos, remains.size() - begin_value_pos); |
60 return true; | 127 return true; |
61 } | 128 } |
62 | 129 |
63 template <typename STR> | 130 template <typename STR> |
64 void SplitStringUsingSubstrT(const STR& str, | 131 void SplitStringUsingSubstrT(const STR& str, |
65 const STR& s, | 132 const STR& s, |
66 std::vector<STR>* r) { | 133 std::vector<STR>* r) { |
67 r->clear(); | 134 r->clear(); |
68 typename STR::size_type begin_index = 0; | 135 typename STR::size_type begin_index = 0; |
69 while (true) { | 136 while (true) { |
70 const typename STR::size_type end_index = str.find(s, begin_index); | 137 const typename STR::size_type end_index = str.find(s, begin_index); |
71 if (end_index == STR::npos) { | 138 if (end_index == STR::npos) { |
72 const STR term = str.substr(begin_index); | 139 const STR term = str.substr(begin_index); |
73 STR tmp; | 140 STR tmp; |
74 TrimWhitespace(term, TRIM_ALL, &tmp); | 141 TrimWhitespace(term, TRIM_ALL, &tmp); |
75 r->push_back(tmp); | 142 r->push_back(tmp); |
76 return; | 143 return; |
77 } | 144 } |
78 const STR term = str.substr(begin_index, end_index - begin_index); | 145 const STR term = str.substr(begin_index, end_index - begin_index); |
79 STR tmp; | 146 STR tmp; |
80 TrimWhitespace(term, TRIM_ALL, &tmp); | 147 TrimWhitespace(term, TRIM_ALL, &tmp); |
81 r->push_back(tmp); | 148 r->push_back(tmp); |
82 begin_index = end_index + s.size(); | 149 begin_index = end_index + s.size(); |
83 } | 150 } |
84 } | 151 } |
85 | 152 |
86 template<typename STR> | |
87 void SplitStringAlongWhitespaceT(const STR& str, std::vector<STR>* result) { | |
88 result->clear(); | |
89 const size_t length = str.length(); | |
90 if (!length) | |
91 return; | |
92 | |
93 bool last_was_ws = false; | |
94 size_t last_non_ws_start = 0; | |
95 for (size_t i = 0; i < length; ++i) { | |
96 switch (str[i]) { | |
97 // HTML 5 defines whitespace as: space, tab, LF, line tab, FF, or CR. | |
98 case L' ': | |
99 case L'\t': | |
100 case L'\xA': | |
101 case L'\xB': | |
102 case L'\xC': | |
103 case L'\xD': | |
104 if (!last_was_ws) { | |
105 if (i > 0) { | |
106 result->push_back( | |
107 str.substr(last_non_ws_start, i - last_non_ws_start)); | |
108 } | |
109 last_was_ws = true; | |
110 } | |
111 break; | |
112 | |
113 default: // Not a space character. | |
114 if (last_was_ws) { | |
115 last_was_ws = false; | |
116 last_non_ws_start = i; | |
117 } | |
118 break; | |
119 } | |
120 } | |
121 if (!last_was_ws) { | |
122 result->push_back( | |
123 str.substr(last_non_ws_start, length - last_non_ws_start)); | |
124 } | |
125 } | |
126 | |
127 } // namespace | 153 } // namespace |
128 | 154 |
129 void SplitString(const string16& str, | 155 std::vector<std::string> SplitString(StringPiece input, |
130 char16 c, | 156 StringPiece separators, |
131 std::vector<string16>* r) { | 157 WhitespaceHandling whitespace, |
| 158 SplitResult result_type) { |
| 159 if (separators.size() == 1) { |
| 160 return SplitStringT<std::string, std::string, char>( |
| 161 input, separators[0], whitespace, result_type); |
| 162 } |
| 163 return SplitStringT<std::string, std::string, StringPiece>( |
| 164 input, separators, whitespace, result_type); |
| 165 } |
| 166 |
| 167 std::vector<string16> SplitString(StringPiece16 input, |
| 168 StringPiece16 separators, |
| 169 WhitespaceHandling whitespace, |
| 170 SplitResult result_type) { |
| 171 if (separators.size() == 1) { |
| 172 return SplitStringT<string16, string16, char16>(input, separators[0], |
| 173 whitespace, result_type); |
| 174 } |
| 175 return SplitStringT<string16, string16, StringPiece16>( |
| 176 input, separators, whitespace, result_type); |
| 177 } |
| 178 |
| 179 std::vector<StringPiece> SplitStringPiece(StringPiece input, |
| 180 StringPiece separators, |
| 181 WhitespaceHandling whitespace, |
| 182 SplitResult result_type) { |
| 183 if (separators.size() == 1) { |
| 184 return SplitStringT<std::string, StringPiece, char>( |
| 185 input, separators[0], whitespace, result_type); |
| 186 } |
| 187 return SplitStringT<std::string, StringPiece, StringPiece>( |
| 188 input, separators, whitespace, result_type); |
| 189 } |
| 190 |
| 191 std::vector<StringPiece16> SplitStringPiece(StringPiece16 input, |
| 192 StringPiece16 separators, |
| 193 WhitespaceHandling whitespace, |
| 194 SplitResult result_type) { |
| 195 if (separators.size() == 1) { |
| 196 return SplitStringT<string16, StringPiece16, char16>( |
| 197 input, separators[0], whitespace, result_type); |
| 198 } |
| 199 return SplitStringT<string16, StringPiece16, StringPiece16>( |
| 200 input, separators, whitespace, result_type); |
| 201 } |
| 202 |
| 203 void SplitString(const string16& str, char16 c, std::vector<string16>* result) { |
132 DCHECK(CBU16_IS_SINGLE(c)); | 204 DCHECK(CBU16_IS_SINGLE(c)); |
133 SplitStringT(str, c, true, r); | 205 *result = SplitStringT<string16, string16, char16>(str, c, TRIM_WHITESPACE, |
| 206 SPLIT_WANT_ALL); |
| 207 |
| 208 // Backward-compat hack: The old SplitString implementation would keep |
| 209 // empty substrings, for example: |
| 210 // "a,,b" -> ["a", "", "b"] |
| 211 // "a, ,b" -> ["a", "", "b"] |
| 212 // which the current code also does. But the old one would discard them when |
| 213 // the only result was that empty string: |
| 214 // " " -> [] |
| 215 // In the latter case, our new code will give [""] |
| 216 if (result->size() == 1 && (*result)[0].empty()) |
| 217 result->clear(); |
134 } | 218 } |
135 | 219 |
136 void SplitString(const std::string& str, | 220 void SplitString(const std::string& str, |
137 char c, | 221 char c, |
138 std::vector<std::string>* r) { | 222 std::vector<std::string>* result) { |
139 #if CHAR_MIN < 0 | 223 #if CHAR_MIN < 0 |
140 DCHECK_GE(c, 0); | 224 DCHECK_GE(c, 0); |
141 #endif | 225 #endif |
142 DCHECK_LT(c, 0x7F); | 226 DCHECK_LT(c, 0x7F); |
143 SplitStringT(str, c, true, r); | 227 *result = SplitStringT<std::string, std::string, char>( |
| 228 str, c, TRIM_WHITESPACE, SPLIT_WANT_ALL); |
| 229 |
| 230 // Backward-compat hack, see above. |
| 231 if (result->size() == 1 && (*result)[0].empty()) |
| 232 result->clear(); |
144 } | 233 } |
145 | 234 |
146 bool SplitStringIntoKeyValuePairs(const std::string& line, | 235 bool SplitStringIntoKeyValuePairs(const std::string& line, |
147 char key_value_delimiter, | 236 char key_value_delimiter, |
148 char key_value_pair_delimiter, | 237 char key_value_pair_delimiter, |
149 StringPairs* key_value_pairs) { | 238 StringPairs* key_value_pairs) { |
150 key_value_pairs->clear(); | 239 key_value_pairs->clear(); |
151 | 240 |
152 std::vector<std::string> pairs; | 241 std::vector<std::string> pairs; |
153 SplitString(line, key_value_pair_delimiter, &pairs); | 242 SplitString(line, key_value_pair_delimiter, &pairs); |
(...skipping 21 matching lines...) Expand all Loading... |
175 std::vector<string16>* r) { | 264 std::vector<string16>* r) { |
176 SplitStringUsingSubstrT(str, s, r); | 265 SplitStringUsingSubstrT(str, s, r); |
177 } | 266 } |
178 | 267 |
179 void SplitStringUsingSubstr(const std::string& str, | 268 void SplitStringUsingSubstr(const std::string& str, |
180 const std::string& s, | 269 const std::string& s, |
181 std::vector<std::string>* r) { | 270 std::vector<std::string>* r) { |
182 SplitStringUsingSubstrT(str, s, r); | 271 SplitStringUsingSubstrT(str, s, r); |
183 } | 272 } |
184 | 273 |
185 void SplitStringDontTrim(const string16& str, | 274 void SplitStringDontTrim(StringPiece16 str, |
186 char16 c, | 275 char16 c, |
187 std::vector<string16>* r) { | 276 std::vector<string16>* result) { |
188 DCHECK(CBU16_IS_SINGLE(c)); | 277 DCHECK(CBU16_IS_SINGLE(c)); |
189 SplitStringT(str, c, false, r); | 278 *result = SplitStringT<string16, string16, char16>(str, c, KEEP_WHITESPACE, |
| 279 SPLIT_WANT_ALL); |
190 } | 280 } |
191 | 281 |
192 void SplitStringDontTrim(const std::string& str, | 282 void SplitStringDontTrim(StringPiece str, |
193 char c, | 283 char c, |
194 std::vector<std::string>* r) { | 284 std::vector<std::string>* result) { |
195 #if CHAR_MIN < 0 | 285 #if CHAR_MIN < 0 |
196 DCHECK_GE(c, 0); | 286 DCHECK_GE(c, 0); |
197 #endif | 287 #endif |
198 DCHECK_LT(c, 0x7F); | 288 DCHECK_LT(c, 0x7F); |
199 SplitStringT(str, c, false, r); | 289 *result = SplitStringT<std::string, std::string, char>( |
| 290 str, c, KEEP_WHITESPACE, SPLIT_WANT_ALL); |
200 } | 291 } |
201 | 292 |
202 void SplitStringAlongWhitespace(const string16& str, | 293 void SplitStringAlongWhitespace(const string16& str, |
203 std::vector<string16>* result) { | 294 std::vector<string16>* result) { |
204 SplitStringAlongWhitespaceT(str, result); | 295 *result = SplitStringT<string16, string16, StringPiece16>( |
| 296 str, StringPiece16(kWhitespaceASCIIAs16), TRIM_WHITESPACE, |
| 297 SPLIT_WANT_NONEMPTY); |
205 } | 298 } |
206 | 299 |
207 void SplitStringAlongWhitespace(const std::string& str, | 300 void SplitStringAlongWhitespace(const std::string& str, |
208 std::vector<std::string>* result) { | 301 std::vector<std::string>* result) { |
209 SplitStringAlongWhitespaceT(str, result); | 302 *result = SplitStringT<std::string, std::string, StringPiece>( |
| 303 str, StringPiece(kWhitespaceASCII), TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY); |
210 } | 304 } |
211 | 305 |
212 } // namespace base | 306 } // namespace base |
OLD | NEW |