OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "base/strings/string_split.h" | 5 #include "base/strings/string_split.h" |
6 | 6 |
7 #include "base/logging.h" | 7 #include "base/logging.h" |
8 #include "base/strings/string_util.h" | 8 #include "base/strings/string_util.h" |
9 #include "base/third_party/icu/icu_utf.h" | 9 #include "base/third_party/icu/icu_utf.h" |
10 | 10 |
11 namespace base { | 11 namespace base { |
12 | 12 |
13 namespace { | 13 namespace { |
14 | 14 |
15 template <typename STR> | 15 // PieceToOutputType converts a StringPiece as needed to a given output type, |
16 void SplitStringT(const STR& str, | 16 // which is either the same type of StringPiece (a NOP) or the corresponding |
17 const typename STR::value_type s, | 17 // non-piece string type. |
18 bool trim_whitespace, | 18 // |
19 std::vector<STR>* r) { | 19 // The default converter is a NOP, it works when the OutputType is the |
20 r->clear(); | 20 // correct StringPiece. |
21 size_t last = 0; | 21 template<typename Str, typename OutputType> |
22 size_t c = str.size(); | 22 OutputType PieceToOutputType(BasicStringPiece<Str> piece) { |
23 for (size_t i = 0; i <= c; ++i) { | 23 return piece; |
24 if (i == c || str[i] == s) { | 24 } |
25 STR tmp(str, last, i - last); | 25 template<> // Convert StringPiece to std::string |
26 if (trim_whitespace) | 26 std::string PieceToOutputType<std::string, std::string>(StringPiece piece) { |
27 TrimWhitespace(tmp, TRIM_ALL, &tmp); | 27 return piece.as_string(); |
28 // Avoid converting an empty or all-whitespace source string into a vector | 28 } |
29 // of one empty string. | 29 template<> // Convert StringPiece16 to string16. |
30 if (i != c || !r->empty() || !tmp.empty()) | 30 string16 PieceToOutputType<string16, string16>(StringPiece16 piece) { |
31 r->push_back(tmp); | 31 return piece.as_string(); |
32 last = i + 1; | 32 } |
| 33 |
| 34 // Returns either the ASCII or UTF-16 whitespace. |
| 35 template<typename Str> BasicStringPiece<Str> WhitespaceForType(); |
| 36 template<> StringPiece16 WhitespaceForType<string16>() { |
| 37 return kWhitespaceUTF16; |
| 38 } |
| 39 template<> StringPiece WhitespaceForType<std::string>() { |
| 40 return kWhitespaceASCII; |
| 41 } |
| 42 |
| 43 // Optimize the single-character case to call find() on the string instead, |
| 44 // since this is the common case and can be made faster. This could have been |
| 45 // done with template specialization too, but would have been less clear. |
| 46 // |
| 47 // There is no corresponding FindFirstNotOf because StringPiece already |
| 48 // implements these different versions that do the optimized searching. |
| 49 size_t FindFirstOf(StringPiece piece, char c, size_t pos) { |
| 50 return piece.find(c, pos); |
| 51 } |
| 52 size_t FindFirstOf(StringPiece16 piece, char16 c, size_t pos) { |
| 53 return piece.find(c, pos); |
| 54 } |
| 55 size_t FindFirstOf(StringPiece piece, StringPiece one_of, size_t pos) { |
| 56 return piece.find_first_of(one_of, pos); |
| 57 } |
| 58 size_t FindFirstOf(StringPiece16 piece, StringPiece16 one_of, size_t pos) { |
| 59 return piece.find_first_of(one_of, pos); |
| 60 } |
| 61 |
| 62 // General string splitter template. Can take 8- or 16-bit input, can produce |
| 63 // the corresponding string or StringPiece output, and can take single- or |
| 64 // multiple-character delimiters. |
| 65 // |
| 66 // DelimiterType is either a character (Str::value_type) or a string piece of |
| 67 // multiple characters (BasicStringPiece<Str>). StringPiece has a version of |
| 68 // find for both of these cases, and the single-character version is the most |
| 69 // common and can be implemented faster, which is why this is a template. |
| 70 template<typename Str, typename OutputStringType, typename DelimiterType> |
| 71 static std::vector<OutputStringType> SplitStringT( |
| 72 BasicStringPiece<Str> str, |
| 73 DelimiterType delimiter, |
| 74 WhitespaceHandling whitespace, |
| 75 SplitResult result_type) { |
| 76 std::vector<OutputStringType> result; |
| 77 if (str.empty()) |
| 78 return result; |
| 79 |
| 80 size_t start = 0; |
| 81 while (start != Str::npos) { |
| 82 size_t end = FindFirstOf(str, delimiter, start); |
| 83 |
| 84 BasicStringPiece<Str> piece; |
| 85 if (end == Str::npos) { |
| 86 piece = str.substr(start); |
| 87 start = Str::npos; |
| 88 } else { |
| 89 piece = str.substr(start, end - start); |
| 90 start = end + 1; |
33 } | 91 } |
| 92 |
| 93 if (whitespace == TRIM_WHITESPACE) |
| 94 piece = TrimString(piece, WhitespaceForType<Str>(), TRIM_ALL); |
| 95 |
| 96 if (result_type == SPLIT_WANT_ALL || !piece.empty()) |
| 97 result.push_back(PieceToOutputType<Str, OutputStringType>(piece)); |
34 } | 98 } |
| 99 return result; |
35 } | 100 } |
36 | 101 |
37 bool SplitStringIntoKeyValue(const std::string& line, | 102 bool SplitStringIntoKeyValue(const std::string& line, |
38 char key_value_delimiter, | 103 char key_value_delimiter, |
39 std::string* key, | 104 std::string* key, |
40 std::string* value) { | 105 std::string* value) { |
41 key->clear(); | 106 key->clear(); |
42 value->clear(); | 107 value->clear(); |
43 | 108 |
44 // Find the delimiter. | 109 // Find the delimiter. |
(...skipping 10 matching lines...) Expand all Loading... |
55 if (begin_value_pos == std::string::npos) { | 120 if (begin_value_pos == std::string::npos) { |
56 DVLOG(1) << "cannot parse value from line: " << line; | 121 DVLOG(1) << "cannot parse value from line: " << line; |
57 return false; // no value | 122 return false; // no value |
58 } | 123 } |
59 value->assign(remains, begin_value_pos, remains.size() - begin_value_pos); | 124 value->assign(remains, begin_value_pos, remains.size() - begin_value_pos); |
60 return true; | 125 return true; |
61 } | 126 } |
62 | 127 |
63 template <typename STR> | 128 template <typename STR> |
64 void SplitStringUsingSubstrT(const STR& str, | 129 void SplitStringUsingSubstrT(const STR& str, |
65 const STR& s, | 130 const STR& s, |
66 std::vector<STR>* r) { | 131 std::vector<STR>* r) { |
67 r->clear(); | 132 r->clear(); |
68 typename STR::size_type begin_index = 0; | 133 typename STR::size_type begin_index = 0; |
69 while (true) { | 134 while (true) { |
70 const typename STR::size_type end_index = str.find(s, begin_index); | 135 const typename STR::size_type end_index = str.find(s, begin_index); |
71 if (end_index == STR::npos) { | 136 if (end_index == STR::npos) { |
72 const STR term = str.substr(begin_index); | 137 const STR term = str.substr(begin_index); |
73 STR tmp; | 138 STR tmp; |
74 TrimWhitespace(term, TRIM_ALL, &tmp); | 139 TrimWhitespace(term, TRIM_ALL, &tmp); |
75 r->push_back(tmp); | 140 r->push_back(tmp); |
76 return; | 141 return; |
77 } | 142 } |
78 const STR term = str.substr(begin_index, end_index - begin_index); | 143 const STR term = str.substr(begin_index, end_index - begin_index); |
79 STR tmp; | 144 STR tmp; |
80 TrimWhitespace(term, TRIM_ALL, &tmp); | 145 TrimWhitespace(term, TRIM_ALL, &tmp); |
81 r->push_back(tmp); | 146 r->push_back(tmp); |
82 begin_index = end_index + s.size(); | 147 begin_index = end_index + s.size(); |
83 } | 148 } |
84 } | 149 } |
85 | 150 |
86 template<typename STR> | |
87 void SplitStringAlongWhitespaceT(const STR& str, std::vector<STR>* result) { | |
88 result->clear(); | |
89 const size_t length = str.length(); | |
90 if (!length) | |
91 return; | |
92 | |
93 bool last_was_ws = false; | |
94 size_t last_non_ws_start = 0; | |
95 for (size_t i = 0; i < length; ++i) { | |
96 switch (str[i]) { | |
97 // HTML 5 defines whitespace as: space, tab, LF, line tab, FF, or CR. | |
98 case L' ': | |
99 case L'\t': | |
100 case L'\xA': | |
101 case L'\xB': | |
102 case L'\xC': | |
103 case L'\xD': | |
104 if (!last_was_ws) { | |
105 if (i > 0) { | |
106 result->push_back( | |
107 str.substr(last_non_ws_start, i - last_non_ws_start)); | |
108 } | |
109 last_was_ws = true; | |
110 } | |
111 break; | |
112 | |
113 default: // Not a space character. | |
114 if (last_was_ws) { | |
115 last_was_ws = false; | |
116 last_non_ws_start = i; | |
117 } | |
118 break; | |
119 } | |
120 } | |
121 if (!last_was_ws) { | |
122 result->push_back( | |
123 str.substr(last_non_ws_start, length - last_non_ws_start)); | |
124 } | |
125 } | |
126 | |
127 } // namespace | 151 } // namespace |
128 | 152 |
| 153 std::vector<std::string> SplitString(StringPiece input, |
| 154 StringPiece separators, |
| 155 WhitespaceHandling whitespace, |
| 156 SplitResult result_type) { |
| 157 if (separators.size() == 1) { |
| 158 return SplitStringT<std::string, std::string, char>( |
| 159 input, separators[0], whitespace, result_type); |
| 160 } |
| 161 return SplitStringT<std::string, std::string, StringPiece>( |
| 162 input, separators, whitespace, result_type); |
| 163 } |
| 164 |
| 165 std::vector<string16> SplitString(StringPiece16 input, |
| 166 StringPiece16 separators, |
| 167 WhitespaceHandling whitespace, |
| 168 SplitResult result_type) { |
| 169 if (separators.size() == 1) { |
| 170 return SplitStringT<string16, string16, char16>( |
| 171 input, separators[0], whitespace, result_type); |
| 172 } |
| 173 return SplitStringT<string16, string16, StringPiece16>( |
| 174 input, separators, whitespace, result_type); |
| 175 } |
| 176 |
| 177 std::vector<StringPiece> SplitStringPiece(StringPiece input, |
| 178 StringPiece separators, |
| 179 WhitespaceHandling whitespace, |
| 180 SplitResult result_type) { |
| 181 if (separators.size() == 1) { |
| 182 return SplitStringT<std::string, StringPiece, char>( |
| 183 input, separators[0], whitespace, result_type); |
| 184 } |
| 185 return SplitStringT<std::string, StringPiece, StringPiece>( |
| 186 input, separators, whitespace, result_type); |
| 187 } |
| 188 |
| 189 std::vector<StringPiece16> SplitStringPiece(StringPiece16 input, |
| 190 StringPiece16 separators, |
| 191 WhitespaceHandling whitespace, |
| 192 SplitResult result_type) { |
| 193 if (separators.size() == 1) { |
| 194 return SplitStringT<string16, StringPiece16, char16>( |
| 195 input, separators[0], whitespace, result_type); |
| 196 } |
| 197 return SplitStringT<string16, StringPiece16, StringPiece16>( |
| 198 input, separators, whitespace, result_type); |
| 199 } |
| 200 |
129 void SplitString(const string16& str, | 201 void SplitString(const string16& str, |
130 char16 c, | 202 char16 c, |
131 std::vector<string16>* r) { | 203 std::vector<string16>* result) { |
132 DCHECK(CBU16_IS_SINGLE(c)); | 204 DCHECK(CBU16_IS_SINGLE(c)); |
133 SplitStringT(str, c, true, r); | 205 *result = SplitStringT<string16, string16, char16>( |
| 206 str, c, TRIM_WHITESPACE, SPLIT_WANT_ALL); |
| 207 |
| 208 // Backward-compat hack: The old SplitString implementation would keep |
| 209 // empty substrings, for example: |
| 210 // "a,,b" -> ["a", "", "b"] |
| 211 // "a, ,b" -> ["a", "", "b"] |
| 212 // which the current code also does. But the old one would discard them when |
| 213 // the only result was that empty string: |
| 214 // " " -> [] |
| 215 // In the latter case, our new code will give [""] |
| 216 if (result->size() == 1 && (*result)[0].empty()) |
| 217 result->clear(); |
134 } | 218 } |
135 | 219 |
136 void SplitString(const std::string& str, | 220 void SplitString(const std::string& str, |
137 char c, | 221 char c, |
138 std::vector<std::string>* r) { | 222 std::vector<std::string>* result) { |
139 #if CHAR_MIN < 0 | 223 #if CHAR_MIN < 0 |
140 DCHECK_GE(c, 0); | 224 DCHECK_GE(c, 0); |
141 #endif | 225 #endif |
142 DCHECK_LT(c, 0x7F); | 226 DCHECK_LT(c, 0x7F); |
143 SplitStringT(str, c, true, r); | 227 *result = SplitStringT<std::string, std::string, char>( |
| 228 str, c, TRIM_WHITESPACE, SPLIT_WANT_ALL); |
| 229 |
| 230 // Backward-compat hack, see above. |
| 231 if (result->size() == 1 && (*result)[0].empty()) |
| 232 result->clear(); |
| 233 |
144 } | 234 } |
145 | 235 |
146 bool SplitStringIntoKeyValuePairs(const std::string& line, | 236 bool SplitStringIntoKeyValuePairs(const std::string& line, |
147 char key_value_delimiter, | 237 char key_value_delimiter, |
148 char key_value_pair_delimiter, | 238 char key_value_pair_delimiter, |
149 StringPairs* key_value_pairs) { | 239 StringPairs* key_value_pairs) { |
150 key_value_pairs->clear(); | 240 key_value_pairs->clear(); |
151 | 241 |
152 std::vector<std::string> pairs; | 242 std::vector<std::string> pairs; |
153 SplitString(line, key_value_pair_delimiter, &pairs); | 243 SplitString(line, key_value_pair_delimiter, &pairs); |
(...skipping 21 matching lines...) Expand all Loading... |
175 std::vector<string16>* r) { | 265 std::vector<string16>* r) { |
176 SplitStringUsingSubstrT(str, s, r); | 266 SplitStringUsingSubstrT(str, s, r); |
177 } | 267 } |
178 | 268 |
179 void SplitStringUsingSubstr(const std::string& str, | 269 void SplitStringUsingSubstr(const std::string& str, |
180 const std::string& s, | 270 const std::string& s, |
181 std::vector<std::string>* r) { | 271 std::vector<std::string>* r) { |
182 SplitStringUsingSubstrT(str, s, r); | 272 SplitStringUsingSubstrT(str, s, r); |
183 } | 273 } |
184 | 274 |
185 void SplitStringDontTrim(const string16& str, | 275 void SplitStringDontTrim(StringPiece16 str, |
186 char16 c, | 276 char16 c, |
187 std::vector<string16>* r) { | 277 std::vector<string16>* result) { |
188 DCHECK(CBU16_IS_SINGLE(c)); | 278 DCHECK(CBU16_IS_SINGLE(c)); |
189 SplitStringT(str, c, false, r); | 279 *result = SplitStringT<string16, string16, char16>( |
| 280 str, c, KEEP_WHITESPACE, SPLIT_WANT_ALL); |
190 } | 281 } |
191 | 282 |
192 void SplitStringDontTrim(const std::string& str, | 283 void SplitStringDontTrim(StringPiece str, |
193 char c, | 284 char c, |
194 std::vector<std::string>* r) { | 285 std::vector<std::string>* result) { |
195 #if CHAR_MIN < 0 | 286 #if CHAR_MIN < 0 |
196 DCHECK_GE(c, 0); | 287 DCHECK_GE(c, 0); |
197 #endif | 288 #endif |
198 DCHECK_LT(c, 0x7F); | 289 DCHECK_LT(c, 0x7F); |
199 SplitStringT(str, c, false, r); | 290 *result = SplitStringT<std::string, std::string, char>( |
| 291 str, c, KEEP_WHITESPACE, SPLIT_WANT_ALL); |
200 } | 292 } |
201 | 293 |
202 void SplitStringAlongWhitespace(const string16& str, | 294 void SplitStringAlongWhitespace(const string16& str, |
203 std::vector<string16>* result) { | 295 std::vector<string16>* result) { |
204 SplitStringAlongWhitespaceT(str, result); | 296 *result = SplitStringT<string16, string16, StringPiece16>( |
| 297 str, StringPiece16(kWhitespaceASCIIAs16), |
| 298 TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY); |
205 } | 299 } |
206 | 300 |
207 void SplitStringAlongWhitespace(const std::string& str, | 301 void SplitStringAlongWhitespace(const std::string& str, |
208 std::vector<std::string>* result) { | 302 std::vector<std::string>* result) { |
209 SplitStringAlongWhitespaceT(str, result); | 303 *result = SplitStringT<std::string, std::string, StringPiece>( |
| 304 str, StringPiece(kWhitespaceASCII), |
| 305 TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY); |
210 } | 306 } |
211 | 307 |
212 } // namespace base | 308 } // namespace base |
OLD | NEW |