| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "base/strings/string_split.h" | |
| 6 | |
| 7 #include "base/logging.h" | |
| 8 #include "base/strings/string_util.h" | |
| 9 #include "base/third_party/icu/icu_utf.h" | |
| 10 | |
| 11 namespace base { | |
| 12 | |
| 13 namespace { | |
| 14 | |
| 15 // PieceToOutputType converts a StringPiece as needed to a given output type, | |
| 16 // which is either the same type of StringPiece (a NOP) or the corresponding | |
| 17 // non-piece string type. | |
| 18 // | |
| 19 // The default converter is a NOP, it works when the OutputType is the | |
| 20 // correct StringPiece. | |
| 21 template <typename Str, typename OutputType> | |
| 22 OutputType PieceToOutputType(BasicStringPiece<Str> piece) { | |
| 23 return piece; | |
| 24 } | |
| 25 template <> // Convert StringPiece to std::string | |
| 26 std::string PieceToOutputType<std::string, std::string>(StringPiece piece) { | |
| 27 return piece.as_string(); | |
| 28 } | |
| 29 template <> // Convert StringPiece16 to string16. | |
| 30 string16 PieceToOutputType<string16, string16>(StringPiece16 piece) { | |
| 31 return piece.as_string(); | |
| 32 } | |
| 33 | |
| 34 // Returns either the ASCII or UTF-16 whitespace. | |
| 35 template <typename Str> | |
| 36 BasicStringPiece<Str> WhitespaceForType(); | |
| 37 template <> | |
| 38 StringPiece16 WhitespaceForType<string16>() { | |
| 39 return kWhitespaceUTF16; | |
| 40 } | |
| 41 template <> | |
| 42 StringPiece WhitespaceForType<std::string>() { | |
| 43 return kWhitespaceASCII; | |
| 44 } | |
| 45 | |
| 46 // Optimize the single-character case to call find() on the string instead, | |
| 47 // since this is the common case and can be made faster. This could have been | |
| 48 // done with template specialization too, but would have been less clear. | |
| 49 // | |
| 50 // There is no corresponding FindFirstNotOf because StringPiece already | |
| 51 // implements these different versions that do the optimized searching. | |
| 52 size_t FindFirstOf(StringPiece piece, char c, size_t pos) { | |
| 53 return piece.find(c, pos); | |
| 54 } | |
| 55 size_t FindFirstOf(StringPiece16 piece, char16 c, size_t pos) { | |
| 56 return piece.find(c, pos); | |
| 57 } | |
| 58 size_t FindFirstOf(StringPiece piece, StringPiece one_of, size_t pos) { | |
| 59 return piece.find_first_of(one_of, pos); | |
| 60 } | |
| 61 size_t FindFirstOf(StringPiece16 piece, StringPiece16 one_of, size_t pos) { | |
| 62 return piece.find_first_of(one_of, pos); | |
| 63 } | |
| 64 | |
| 65 // General string splitter template. Can take 8- or 16-bit input, can produce | |
| 66 // the corresponding string or StringPiece output, and can take single- or | |
| 67 // multiple-character delimiters. | |
| 68 // | |
| 69 // DelimiterType is either a character (Str::value_type) or a string piece of | |
| 70 // multiple characters (BasicStringPiece<Str>). StringPiece has a version of | |
| 71 // find for both of these cases, and the single-character version is the most | |
| 72 // common and can be implemented faster, which is why this is a template. | |
| 73 template <typename Str, typename OutputStringType, typename DelimiterType> | |
| 74 static std::vector<OutputStringType> SplitStringT(BasicStringPiece<Str> str, | |
| 75 DelimiterType delimiter, | |
| 76 WhitespaceHandling whitespace, | |
| 77 SplitResult result_type) { | |
| 78 std::vector<OutputStringType> result; | |
| 79 if (str.empty()) | |
| 80 return result; | |
| 81 | |
| 82 size_t start = 0; | |
| 83 while (start != Str::npos) { | |
| 84 size_t end = FindFirstOf(str, delimiter, start); | |
| 85 | |
| 86 BasicStringPiece<Str> piece; | |
| 87 if (end == Str::npos) { | |
| 88 piece = str.substr(start); | |
| 89 start = Str::npos; | |
| 90 } else { | |
| 91 piece = str.substr(start, end - start); | |
| 92 start = end + 1; | |
| 93 } | |
| 94 | |
| 95 if (whitespace == TRIM_WHITESPACE) | |
| 96 piece = TrimString(piece, WhitespaceForType<Str>(), TRIM_ALL); | |
| 97 | |
| 98 if (result_type == SPLIT_WANT_ALL || !piece.empty()) | |
| 99 result.push_back(PieceToOutputType<Str, OutputStringType>(piece)); | |
| 100 } | |
| 101 return result; | |
| 102 } | |
| 103 | |
| 104 bool SplitStringIntoKeyValue(const std::string& line, | |
| 105 char key_value_delimiter, | |
| 106 std::string* key, | |
| 107 std::string* value) { | |
| 108 key->clear(); | |
| 109 value->clear(); | |
| 110 | |
| 111 // Find the delimiter. | |
| 112 size_t end_key_pos = line.find_first_of(key_value_delimiter); | |
| 113 if (end_key_pos == std::string::npos) { | |
| 114 DVLOG(1) << "cannot find delimiter in: " << line; | |
| 115 return false; // no delimiter | |
| 116 } | |
| 117 key->assign(line, 0, end_key_pos); | |
| 118 | |
| 119 // Find the value string. | |
| 120 std::string remains(line, end_key_pos, line.size() - end_key_pos); | |
| 121 size_t begin_value_pos = remains.find_first_not_of(key_value_delimiter); | |
| 122 if (begin_value_pos == std::string::npos) { | |
| 123 DVLOG(1) << "cannot parse value from line: " << line; | |
| 124 return false; // no value | |
| 125 } | |
| 126 value->assign(remains, begin_value_pos, remains.size() - begin_value_pos); | |
| 127 return true; | |
| 128 } | |
| 129 | |
| 130 template <typename STR> | |
| 131 void SplitStringUsingSubstrT(const STR& str, | |
| 132 const STR& s, | |
| 133 std::vector<STR>* r) { | |
| 134 r->clear(); | |
| 135 typename STR::size_type begin_index = 0; | |
| 136 while (true) { | |
| 137 const typename STR::size_type end_index = str.find(s, begin_index); | |
| 138 if (end_index == STR::npos) { | |
| 139 const STR term = str.substr(begin_index); | |
| 140 STR tmp; | |
| 141 TrimWhitespace(term, TRIM_ALL, &tmp); | |
| 142 r->push_back(tmp); | |
| 143 return; | |
| 144 } | |
| 145 const STR term = str.substr(begin_index, end_index - begin_index); | |
| 146 STR tmp; | |
| 147 TrimWhitespace(term, TRIM_ALL, &tmp); | |
| 148 r->push_back(tmp); | |
| 149 begin_index = end_index + s.size(); | |
| 150 } | |
| 151 } | |
| 152 | |
| 153 } // namespace | |
| 154 | |
| 155 std::vector<std::string> SplitString(StringPiece input, | |
| 156 StringPiece separators, | |
| 157 WhitespaceHandling whitespace, | |
| 158 SplitResult result_type) { | |
| 159 if (separators.size() == 1) { | |
| 160 return SplitStringT<std::string, std::string, char>( | |
| 161 input, separators[0], whitespace, result_type); | |
| 162 } | |
| 163 return SplitStringT<std::string, std::string, StringPiece>( | |
| 164 input, separators, whitespace, result_type); | |
| 165 } | |
| 166 | |
| 167 std::vector<string16> SplitString(StringPiece16 input, | |
| 168 StringPiece16 separators, | |
| 169 WhitespaceHandling whitespace, | |
| 170 SplitResult result_type) { | |
| 171 if (separators.size() == 1) { | |
| 172 return SplitStringT<string16, string16, char16>(input, separators[0], | |
| 173 whitespace, result_type); | |
| 174 } | |
| 175 return SplitStringT<string16, string16, StringPiece16>( | |
| 176 input, separators, whitespace, result_type); | |
| 177 } | |
| 178 | |
| 179 std::vector<StringPiece> SplitStringPiece(StringPiece input, | |
| 180 StringPiece separators, | |
| 181 WhitespaceHandling whitespace, | |
| 182 SplitResult result_type) { | |
| 183 if (separators.size() == 1) { | |
| 184 return SplitStringT<std::string, StringPiece, char>( | |
| 185 input, separators[0], whitespace, result_type); | |
| 186 } | |
| 187 return SplitStringT<std::string, StringPiece, StringPiece>( | |
| 188 input, separators, whitespace, result_type); | |
| 189 } | |
| 190 | |
| 191 std::vector<StringPiece16> SplitStringPiece(StringPiece16 input, | |
| 192 StringPiece16 separators, | |
| 193 WhitespaceHandling whitespace, | |
| 194 SplitResult result_type) { | |
| 195 if (separators.size() == 1) { | |
| 196 return SplitStringT<string16, StringPiece16, char16>( | |
| 197 input, separators[0], whitespace, result_type); | |
| 198 } | |
| 199 return SplitStringT<string16, StringPiece16, StringPiece16>( | |
| 200 input, separators, whitespace, result_type); | |
| 201 } | |
| 202 | |
| 203 void SplitString(const string16& str, char16 c, std::vector<string16>* result) { | |
| 204 DCHECK(CBU16_IS_SINGLE(c)); | |
| 205 *result = SplitStringT<string16, string16, char16>(str, c, TRIM_WHITESPACE, | |
| 206 SPLIT_WANT_ALL); | |
| 207 | |
| 208 // Backward-compat hack: The old SplitString implementation would keep | |
| 209 // empty substrings, for example: | |
| 210 // "a,,b" -> ["a", "", "b"] | |
| 211 // "a, ,b" -> ["a", "", "b"] | |
| 212 // which the current code also does. But the old one would discard them when | |
| 213 // the only result was that empty string: | |
| 214 // " " -> [] | |
| 215 // In the latter case, our new code will give [""] | |
| 216 if (result->size() == 1 && (*result)[0].empty()) | |
| 217 result->clear(); | |
| 218 } | |
| 219 | |
| 220 void SplitString(const std::string& str, | |
| 221 char c, | |
| 222 std::vector<std::string>* result) { | |
| 223 #if CHAR_MIN < 0 | |
| 224 DCHECK_GE(c, 0); | |
| 225 #endif | |
| 226 DCHECK_LT(c, 0x7F); | |
| 227 *result = SplitStringT<std::string, std::string, char>( | |
| 228 str, c, TRIM_WHITESPACE, SPLIT_WANT_ALL); | |
| 229 | |
| 230 // Backward-compat hack, see above. | |
| 231 if (result->size() == 1 && (*result)[0].empty()) | |
| 232 result->clear(); | |
| 233 } | |
| 234 | |
| 235 bool SplitStringIntoKeyValuePairs(const std::string& line, | |
| 236 char key_value_delimiter, | |
| 237 char key_value_pair_delimiter, | |
| 238 StringPairs* key_value_pairs) { | |
| 239 key_value_pairs->clear(); | |
| 240 | |
| 241 std::vector<std::string> pairs; | |
| 242 SplitString(line, key_value_pair_delimiter, &pairs); | |
| 243 | |
| 244 bool success = true; | |
| 245 for (size_t i = 0; i < pairs.size(); ++i) { | |
| 246 // Don't add empty pairs into the result. | |
| 247 if (pairs[i].empty()) | |
| 248 continue; | |
| 249 | |
| 250 std::string key; | |
| 251 std::string value; | |
| 252 if (!SplitStringIntoKeyValue(pairs[i], key_value_delimiter, &key, &value)) { | |
| 253 // Don't return here, to allow for pairs without associated | |
| 254 // value or key; just record that the split failed. | |
| 255 success = false; | |
| 256 } | |
| 257 key_value_pairs->push_back(make_pair(key, value)); | |
| 258 } | |
| 259 return success; | |
| 260 } | |
| 261 | |
| 262 void SplitStringUsingSubstr(const string16& str, | |
| 263 const string16& s, | |
| 264 std::vector<string16>* r) { | |
| 265 SplitStringUsingSubstrT(str, s, r); | |
| 266 } | |
| 267 | |
| 268 void SplitStringUsingSubstr(const std::string& str, | |
| 269 const std::string& s, | |
| 270 std::vector<std::string>* r) { | |
| 271 SplitStringUsingSubstrT(str, s, r); | |
| 272 } | |
| 273 | |
| 274 void SplitStringDontTrim(StringPiece16 str, | |
| 275 char16 c, | |
| 276 std::vector<string16>* result) { | |
| 277 DCHECK(CBU16_IS_SINGLE(c)); | |
| 278 *result = SplitStringT<string16, string16, char16>(str, c, KEEP_WHITESPACE, | |
| 279 SPLIT_WANT_ALL); | |
| 280 } | |
| 281 | |
| 282 void SplitStringDontTrim(StringPiece str, | |
| 283 char c, | |
| 284 std::vector<std::string>* result) { | |
| 285 #if CHAR_MIN < 0 | |
| 286 DCHECK_GE(c, 0); | |
| 287 #endif | |
| 288 DCHECK_LT(c, 0x7F); | |
| 289 *result = SplitStringT<std::string, std::string, char>( | |
| 290 str, c, KEEP_WHITESPACE, SPLIT_WANT_ALL); | |
| 291 } | |
| 292 | |
| 293 void SplitStringAlongWhitespace(const string16& str, | |
| 294 std::vector<string16>* result) { | |
| 295 *result = SplitStringT<string16, string16, StringPiece16>( | |
| 296 str, StringPiece16(kWhitespaceASCIIAs16), TRIM_WHITESPACE, | |
| 297 SPLIT_WANT_NONEMPTY); | |
| 298 } | |
| 299 | |
| 300 void SplitStringAlongWhitespace(const std::string& str, | |
| 301 std::vector<std::string>* result) { | |
| 302 *result = SplitStringT<std::string, std::string, StringPiece>( | |
| 303 str, StringPiece(kWhitespaceASCII), TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY); | |
| 304 } | |
| 305 | |
| 306 } // namespace base | |
| OLD | NEW |