OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "base/strings/string_split.h" | 5 #include "base/strings/string_split.h" |
6 | 6 |
7 #include "base/logging.h" | 7 #include "base/logging.h" |
8 #include "base/strings/string_util.h" | 8 #include "base/strings/string_util.h" |
9 #include "base/third_party/icu/icu_utf.h" | 9 #include "base/third_party/icu/icu_utf.h" |
10 | 10 |
11 namespace base { | 11 namespace base { |
12 | 12 |
13 namespace { | 13 namespace { |
14 | 14 |
15 template <typename STR> | 15 // PieceToOutputType converts a StringPiece as needed to a given output type, |
16 void SplitStringT(const STR& str, | 16 // which is either the same type of StringPiece (a NOP) or the corresponding |
17 const typename STR::value_type s, | 17 // non-piece string type. |
18 bool trim_whitespace, | 18 // |
19 std::vector<STR>* r) { | 19 // The default converter is a NOP, it works when the OutputType is the |
20 r->clear(); | 20 // correct StringPiece. |
21 size_t last = 0; | 21 template<typename Str, typename OutputType> |
22 size_t c = str.size(); | 22 OutputType PieceToOutputType(BasicStringPiece<Str> piece) { |
23 for (size_t i = 0; i <= c; ++i) { | 23 return piece; |
24 if (i == c || str[i] == s) { | 24 } |
25 STR tmp(str, last, i - last); | 25 template<> // Convert StringPiece to std::string |
26 if (trim_whitespace) | 26 std::string PieceToOutputType<std::string, std::string>(StringPiece piece) { |
27 TrimWhitespace(tmp, TRIM_ALL, &tmp); | 27 return piece.as_string(); |
28 // Avoid converting an empty or all-whitespace source string into a vector | 28 } |
29 // of one empty string. | 29 template<> // Convert StringPiece16 to string16. |
30 if (i != c || !r->empty() || !tmp.empty()) | 30 string16 PieceToOutputType<string16, string16>(StringPiece16 piece) { |
31 r->push_back(tmp); | 31 return piece.as_string(); |
32 last = i + 1; | 32 } |
33 | |
34 // Returns either the ASCII or UTF-16 whitespace. | |
35 template<typename Str> BasicStringPiece<Str> WhitespaceForType(); | |
36 template<> StringPiece16 WhitespaceForType<string16>() { | |
37 return kWhitespaceUTF16; | |
38 } | |
39 template<> StringPiece WhitespaceForType<std::string>() { | |
40 return kWhitespaceASCII; | |
41 } | |
42 | |
43 // Optimize the single-character case to call find() on the string instead, | |
44 // since this is the common case and can be made faster. This could have been | |
45 // done with template specialization too, but would have been less clear. | |
46 // | |
47 // There is no corresponding FindFirstNotOf because StringPiece already | |
48 // implements these different versions that do the optimized searching. | |
49 size_t FindFirstOf(StringPiece piece, char c, size_t pos) { | |
50 return piece.find(c, pos); | |
51 } | |
52 size_t FindFirstOf(StringPiece16 piece, char16 c, size_t pos) { | |
53 return piece.find(c, pos); | |
54 } | |
55 size_t FindFirstOf(StringPiece piece, StringPiece one_of, size_t pos) { | |
56 return piece.find_first_of(one_of, pos); | |
57 } | |
58 size_t FindFirstOf(StringPiece16 piece, StringPiece16 one_of, size_t pos) { | |
59 return piece.find_first_of(one_of, pos); | |
60 } | |
61 | |
62 // General string splitter template. Can take 8- or 16-bit input, can produce | |
63 // the corresponding string or StringPiece output, and can take single- or | |
64 // multiple-character delimiters. | |
65 // | |
66 // DelimiterType is either a character (Str::value_type> or a string piece of | |
danakj
2015/06/11 23:58:34
typo here with value_type>
| |
67 // multiple characters (BasicStringPiece<Str>). StringPiece has version of | |
danakj
2015/06/11 23:58:34
has a version
| |
68 // find for both of these cases, and the single-character version is the most | |
69 // common and can be implemented faster, which is why this is a template. | |
70 template<typename Str, typename OutputStringType, typename DelimiterType> | |
71 static std::vector<OutputStringType> SplitStringT( | |
72 BasicStringPiece<Str> str, | |
73 DelimiterType delimiter, | |
74 WhitespaceHandling whitespace, | |
75 SplitResult result_type) { | |
76 std::vector<OutputStringType> result; | |
77 if (str.empty()) | |
78 return result; | |
79 | |
80 size_t start = 0; | |
81 while (start != Str::npos) { | |
82 size_t end = FindFirstOf(str, delimiter, start); | |
83 | |
84 BasicStringPiece<Str> piece; | |
85 if (end == Str::npos) { | |
86 piece = str.substr(start); | |
87 start = Str::npos; | |
88 } else { | |
89 piece = str.substr(start, end - start); | |
90 start = end + 1; | |
33 } | 91 } |
92 | |
93 if (whitespace == TRIM_WHITESPACE) | |
94 piece = TrimString(piece, WhitespaceForType<Str>(), TRIM_ALL); | |
95 | |
96 if (result_type == SPLIT_WANT_ALL || !piece.empty()) | |
97 result.push_back(PieceToOutputType<Str, OutputStringType>(piece)); | |
34 } | 98 } |
99 return result; | |
35 } | 100 } |
36 | 101 |
37 bool SplitStringIntoKeyValue(const std::string& line, | 102 bool SplitStringIntoKeyValue(const std::string& line, |
38 char key_value_delimiter, | 103 char key_value_delimiter, |
39 std::string* key, | 104 std::string* key, |
40 std::string* value) { | 105 std::string* value) { |
41 key->clear(); | 106 key->clear(); |
42 value->clear(); | 107 value->clear(); |
43 | 108 |
44 // Find the delimiter. | 109 // Find the delimiter. |
(...skipping 10 matching lines...) Expand all Loading... | |
55 if (begin_value_pos == std::string::npos) { | 120 if (begin_value_pos == std::string::npos) { |
56 DVLOG(1) << "cannot parse value from line: " << line; | 121 DVLOG(1) << "cannot parse value from line: " << line; |
57 return false; // no value | 122 return false; // no value |
58 } | 123 } |
59 value->assign(remains, begin_value_pos, remains.size() - begin_value_pos); | 124 value->assign(remains, begin_value_pos, remains.size() - begin_value_pos); |
60 return true; | 125 return true; |
61 } | 126 } |
62 | 127 |
63 template <typename STR> | 128 template <typename STR> |
64 void SplitStringUsingSubstrT(const STR& str, | 129 void SplitStringUsingSubstrT(const STR& str, |
65 const STR& s, | 130 const STR& s, |
66 std::vector<STR>* r) { | 131 std::vector<STR>* r) { |
67 r->clear(); | 132 r->clear(); |
68 typename STR::size_type begin_index = 0; | 133 typename STR::size_type begin_index = 0; |
69 while (true) { | 134 while (true) { |
70 const typename STR::size_type end_index = str.find(s, begin_index); | 135 const typename STR::size_type end_index = str.find(s, begin_index); |
71 if (end_index == STR::npos) { | 136 if (end_index == STR::npos) { |
72 const STR term = str.substr(begin_index); | 137 const STR term = str.substr(begin_index); |
73 STR tmp; | 138 STR tmp; |
74 TrimWhitespace(term, TRIM_ALL, &tmp); | 139 TrimWhitespace(term, TRIM_ALL, &tmp); |
75 r->push_back(tmp); | 140 r->push_back(tmp); |
76 return; | 141 return; |
77 } | 142 } |
78 const STR term = str.substr(begin_index, end_index - begin_index); | 143 const STR term = str.substr(begin_index, end_index - begin_index); |
79 STR tmp; | 144 STR tmp; |
80 TrimWhitespace(term, TRIM_ALL, &tmp); | 145 TrimWhitespace(term, TRIM_ALL, &tmp); |
81 r->push_back(tmp); | 146 r->push_back(tmp); |
82 begin_index = end_index + s.size(); | 147 begin_index = end_index + s.size(); |
83 } | 148 } |
84 } | 149 } |
85 | 150 |
86 template<typename STR> | |
87 void SplitStringAlongWhitespaceT(const STR& str, std::vector<STR>* result) { | |
88 result->clear(); | |
89 const size_t length = str.length(); | |
90 if (!length) | |
91 return; | |
92 | |
93 bool last_was_ws = false; | |
94 size_t last_non_ws_start = 0; | |
95 for (size_t i = 0; i < length; ++i) { | |
96 switch (str[i]) { | |
97 // HTML 5 defines whitespace as: space, tab, LF, line tab, FF, or CR. | |
98 case L' ': | |
99 case L'\t': | |
100 case L'\xA': | |
101 case L'\xB': | |
102 case L'\xC': | |
103 case L'\xD': | |
104 if (!last_was_ws) { | |
105 if (i > 0) { | |
106 result->push_back( | |
107 str.substr(last_non_ws_start, i - last_non_ws_start)); | |
108 } | |
109 last_was_ws = true; | |
110 } | |
111 break; | |
112 | |
113 default: // Not a space character. | |
114 if (last_was_ws) { | |
115 last_was_ws = false; | |
116 last_non_ws_start = i; | |
117 } | |
118 break; | |
119 } | |
120 } | |
121 if (!last_was_ws) { | |
122 result->push_back( | |
123 str.substr(last_non_ws_start, length - last_non_ws_start)); | |
124 } | |
125 } | |
126 | |
127 } // namespace | 151 } // namespace |
128 | 152 |
153 std::vector<std::string> SplitString(StringPiece input, | |
154 StringPiece separators, | |
155 WhitespaceHandling whitespace, | |
156 SplitResult result_type) { | |
157 if (separators.size() == 1) { | |
158 return SplitStringT<std::string, std::string, char>( | |
159 input, separators[0], whitespace, result_type); | |
160 } | |
161 return SplitStringT<std::string, std::string, StringPiece>( | |
162 input, separators, whitespace, result_type); | |
163 } | |
164 | |
165 std::vector<string16> SplitString(StringPiece16 input, | |
166 StringPiece16 separators, | |
167 WhitespaceHandling whitespace, | |
168 SplitResult result_type) { | |
169 if (separators.size() == 1) { | |
170 return SplitStringT<string16, string16, char16>( | |
171 input, separators[0], whitespace, result_type); | |
172 } | |
173 return SplitStringT<string16, string16, StringPiece16>( | |
174 input, separators, whitespace, result_type); | |
175 } | |
176 | |
177 std::vector<StringPiece> SplitStringPiece(StringPiece input, | |
178 StringPiece separators, | |
179 WhitespaceHandling whitespace, | |
180 SplitResult result_type) { | |
181 if (separators.size() == 1) { | |
182 return SplitStringT<std::string, StringPiece, char>( | |
183 input, separators[0], whitespace, result_type); | |
184 } | |
185 return SplitStringT<std::string, StringPiece, StringPiece>( | |
186 input, separators, whitespace, result_type); | |
187 } | |
188 | |
189 std::vector<StringPiece16> SplitStringPiece(StringPiece16 input, | |
190 StringPiece16 separators, | |
191 WhitespaceHandling whitespace, | |
192 SplitResult result_type) { | |
193 if (separators.size() == 1) { | |
194 return SplitStringT<string16, StringPiece16, char16>( | |
195 input, separators[0], whitespace, result_type); | |
196 } | |
197 return SplitStringT<string16, StringPiece16, StringPiece16>( | |
198 input, separators, whitespace, result_type); | |
199 } | |
200 | |
129 void SplitString(const string16& str, | 201 void SplitString(const string16& str, |
130 char16 c, | 202 char16 c, |
131 std::vector<string16>* r) { | 203 std::vector<string16>* result) { |
132 DCHECK(CBU16_IS_SINGLE(c)); | 204 DCHECK(CBU16_IS_SINGLE(c)); |
133 SplitStringT(str, c, true, r); | 205 *result = SplitStringT<string16, string16, char16>( |
206 str, c, TRIM_WHITESPACE, SPLIT_WANT_ALL); | |
207 | |
208 // Backward-compat hack: The old SplitString implementation would keep | |
209 // empty substrings, for example: | |
210 // "a,,b" -> ["a", "", "b"] | |
211 // "a, ,b" -> ["a", "", "b"] | |
212 // which the current code also does. But the old one would discard them when | |
213 // the only result was that empty string: | |
214 // " " -> [] | |
215 // In the latter case, our new code will give [""] | |
216 if (result->size() == 1 && (*result)[0].empty()) | |
217 result->clear(); | |
134 } | 218 } |
135 | 219 |
136 void SplitString(const std::string& str, | 220 void SplitString(const std::string& str, |
137 char c, | 221 char c, |
138 std::vector<std::string>* r) { | 222 std::vector<std::string>* result) { |
139 #if CHAR_MIN < 0 | 223 #if CHAR_MIN < 0 |
140 DCHECK_GE(c, 0); | 224 DCHECK_GE(c, 0); |
141 #endif | 225 #endif |
142 DCHECK_LT(c, 0x7F); | 226 DCHECK_LT(c, 0x7F); |
143 SplitStringT(str, c, true, r); | 227 *result = SplitStringT<std::string, std::string, char>( |
228 str, c, TRIM_WHITESPACE, SPLIT_WANT_ALL); | |
229 | |
230 // Backward-compat hack, see above. | |
231 if (result->size() == 1 && (*result)[0].empty()) | |
232 result->clear(); | |
233 | |
144 } | 234 } |
145 | 235 |
146 bool SplitStringIntoKeyValuePairs(const std::string& line, | 236 bool SplitStringIntoKeyValuePairs(const std::string& line, |
147 char key_value_delimiter, | 237 char key_value_delimiter, |
148 char key_value_pair_delimiter, | 238 char key_value_pair_delimiter, |
149 StringPairs* key_value_pairs) { | 239 StringPairs* key_value_pairs) { |
150 key_value_pairs->clear(); | 240 key_value_pairs->clear(); |
151 | 241 |
152 std::vector<std::string> pairs; | 242 std::vector<std::string> pairs; |
153 SplitString(line, key_value_pair_delimiter, &pairs); | 243 SplitString(line, key_value_pair_delimiter, &pairs); |
(...skipping 21 matching lines...) Expand all Loading... | |
175 std::vector<string16>* r) { | 265 std::vector<string16>* r) { |
176 SplitStringUsingSubstrT(str, s, r); | 266 SplitStringUsingSubstrT(str, s, r); |
177 } | 267 } |
178 | 268 |
179 void SplitStringUsingSubstr(const std::string& str, | 269 void SplitStringUsingSubstr(const std::string& str, |
180 const std::string& s, | 270 const std::string& s, |
181 std::vector<std::string>* r) { | 271 std::vector<std::string>* r) { |
182 SplitStringUsingSubstrT(str, s, r); | 272 SplitStringUsingSubstrT(str, s, r); |
183 } | 273 } |
184 | 274 |
185 void SplitStringDontTrim(const string16& str, | 275 void SplitStringDontTrim(StringPiece16 str, |
186 char16 c, | 276 char16 c, |
187 std::vector<string16>* r) { | 277 std::vector<string16>* result) { |
188 DCHECK(CBU16_IS_SINGLE(c)); | 278 DCHECK(CBU16_IS_SINGLE(c)); |
189 SplitStringT(str, c, false, r); | 279 *result = SplitStringT<string16, string16, char16>( |
280 str, c, KEEP_WHITESPACE, SPLIT_WANT_ALL); | |
190 } | 281 } |
191 | 282 |
192 void SplitStringDontTrim(const std::string& str, | 283 void SplitStringDontTrim(StringPiece str, |
193 char c, | 284 char c, |
194 std::vector<std::string>* r) { | 285 std::vector<std::string>* result) { |
195 #if CHAR_MIN < 0 | 286 #if CHAR_MIN < 0 |
196 DCHECK_GE(c, 0); | 287 DCHECK_GE(c, 0); |
197 #endif | 288 #endif |
198 DCHECK_LT(c, 0x7F); | 289 DCHECK_LT(c, 0x7F); |
199 SplitStringT(str, c, false, r); | 290 *result = SplitStringT<std::string, std::string, char>( |
291 str, c, KEEP_WHITESPACE, SPLIT_WANT_ALL); | |
200 } | 292 } |
201 | 293 |
202 void SplitStringAlongWhitespace(const string16& str, | 294 void SplitStringAlongWhitespace(const string16& str, |
203 std::vector<string16>* result) { | 295 std::vector<string16>* result) { |
204 SplitStringAlongWhitespaceT(str, result); | 296 *result = SplitStringT<string16, string16, StringPiece16>( |
297 str, StringPiece16(kWhitespaceASCIIAs16), | |
298 TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY); | |
205 } | 299 } |
206 | 300 |
207 void SplitStringAlongWhitespace(const std::string& str, | 301 void SplitStringAlongWhitespace(const std::string& str, |
208 std::vector<std::string>* result) { | 302 std::vector<std::string>* result) { |
209 SplitStringAlongWhitespaceT(str, result); | 303 *result = SplitStringT<std::string, std::string, StringPiece>( |
304 str, StringPiece(kWhitespaceASCII), | |
305 TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY); | |
210 } | 306 } |
211 | 307 |
212 } // namespace base | 308 } // namespace base |
OLD | NEW |