OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef BASE_STRINGS_STRING_SPLIT_H_ | 5 #ifndef BASE_STRINGS_STRING_SPLIT_H_ |
6 #define BASE_STRINGS_STRING_SPLIT_H_ | 6 #define BASE_STRINGS_STRING_SPLIT_H_ |
7 | 7 |
8 #include <string> | 8 #include <string> |
9 #include <utility> | 9 #include <utility> |
10 #include <vector> | 10 #include <vector> |
11 | 11 |
12 #include "base/base_export.h" | 12 #include "base/base_export.h" |
13 #include "base/strings/string16.h" | 13 #include "base/strings/string16.h" |
14 #include "base/strings/string_piece.h" | |
14 | 15 |
15 namespace base { | 16 namespace base { |
16 | 17 |
17 // Splits |str| into a vector of strings delimited by |c|, placing the results | 18 enum WhitespaceHandling { |
18 // in |r|. If several instances of |c| are contiguous, or if |str| begins with | 19 KEEP_WHITESPACE, |
19 // or ends with |c|, then an empty string is inserted. | 20 TRIM_WHITESPACE, |
21 }; | |
22 | |
23 enum SplitResult { | |
24 // Strictly return all results. | |
25 // | |
26 // If the input is ",," and the separator is ',' this will return a | |
danakj
2015/06/12 17:58:12
thanks this is nicer
| |
27 // vector of three empty strings. | |
28 SPLIT_WANT_ALL, | |
29 | |
30 // Only nonempty results will be added to the results. Multiple separators | |
31 // will be coalesced. Separators at the beginning and end of the input will | |
32 // be ignored. With TRIM_WHITESPACE, whitespace-only results will be dropped. | |
33 // | |
34 // If the input is ",," and the separator is ',', this will return an empty | |
35 // vector. | |
36 SPLIT_WANT_NONEMPTY, | |
37 }; | |
38 | |
39 // Split the given string on ANY of the given separators, returning copies of | |
40 // the result. | |
20 // | 41 // |
21 // Every substring is trimmed of any leading or trailing white space. | 42 // To split on either commas or semicolons, keeping all whitespace: |
22 // NOTE: |c| must be in BMP (Basic Multilingual Plane) | 43 // |
23 BASE_EXPORT void SplitString(const string16& str, | 44 // std::vector<std::string> tokens = base::SplitString( |
24 char16 c, | 45 // input, ",;", base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL); |
25 std::vector<string16>* r); | 46 BASE_EXPORT std::vector<std::string> SplitString( |
47 StringPiece input, | |
48 StringPiece separators, | |
49 WhitespaceHandling whitespace, | |
50 SplitResult result_type); | |
51 BASE_EXPORT std::vector<string16> SplitString( | |
52 StringPiece16 input, | |
53 StringPiece16 separators, | |
54 WhitespaceHandling whitespace, | |
55 SplitResult result_type); | |
26 | 56 |
27 // |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which | 57 // Like SplitString above except it returns a vector of StringPieces which |
28 // the trailing byte of a multi-byte character can be in the ASCII range. | 58 // reference the original buffer without copying. Although you have to be |
29 // UTF-8, and other single/multi-byte ASCII-compatible encodings are OK. | 59 // careful to keep the original string unmodified, this provides an efficient |
30 // Note: |c| must be in the ASCII range. | 60 // way to iterate through tokens in a string. |
31 BASE_EXPORT void SplitString(const std::string& str, | 61 // |
32 char c, | 62 // To iterate through all whitespace-separated tokens in an input string: |
33 std::vector<std::string>* r); | 63 // |
64 // for (const auto& cur : | |
65 // base::SplitStringPiece(input, base::kWhitespaceASCII, | |
66 // base::KEEP_WHITESPACE, | |
danakj
2015/06/12 17:58:12
Just to sanity check, KEEP or TRIM whitespace here
brettw
2015/06/12 18:09:45
Correct. I wrote this since then the code won't ev
| |
67 // base::SPLIT_WANT_NONEMPTY)) { | |
68 // ... | |
69 BASE_EXPORT std::vector<StringPiece> SplitStringPiece( | |
70 StringPiece input, | |
71 StringPiece separators, | |
72 WhitespaceHandling whitespace, | |
73 SplitResult result_type); | |
74 BASE_EXPORT std::vector<StringPiece16> SplitStringPiece( | |
75 StringPiece16 input, | |
76 StringPiece16 separators, | |
77 WhitespaceHandling whitespace, | |
78 SplitResult result_type); | |
34 | 79 |
35 typedef std::vector<std::pair<std::string, std::string> > StringPairs; | 80 typedef std::vector<std::pair<std::string, std::string>> StringPairs; |
danakj
2015/06/12 17:58:12
nit: while you're changing this you could make it
brettw
2015/06/12 18:09:45
Sure.
| |
36 | 81 |
37 // Splits |line| into key value pairs according to the given delimiters and | 82 // Splits |line| into key value pairs according to the given delimiters and |
38 // removes whitespace leading each key and trailing each value. Returns true | 83 // removes whitespace leading each key and trailing each value. Returns true |
39 // only if each pair has a non-empty key and value. |key_value_pairs| will | 84 // only if each pair has a non-empty key and value. |key_value_pairs| will |
40 // include ("","") pairs for entries without |key_value_delimiter|. | 85 // include ("","") pairs for entries without |key_value_delimiter|. |
41 BASE_EXPORT bool SplitStringIntoKeyValuePairs(const std::string& line, | 86 BASE_EXPORT bool SplitStringIntoKeyValuePairs(const std::string& line, |
42 char key_value_delimiter, | 87 char key_value_delimiter, |
43 char key_value_pair_delimiter, | 88 char key_value_pair_delimiter, |
44 StringPairs* key_value_pairs); | 89 StringPairs* key_value_pairs); |
45 | 90 |
46 // The same as SplitString, but use a substring delimiter instead of a char. | 91 // Similar to SplitString, but use a substring delimiter instead of a list of |
92 // characters that are all possible delimiters. | |
93 // | |
94 // TODO(brettw) this should probably be changed and expanded to provide a | |
95 // mirror of the SplitString[Piece] API above, just with the different | |
96 // delimiter handling. | |
47 BASE_EXPORT void SplitStringUsingSubstr(const string16& str, | 97 BASE_EXPORT void SplitStringUsingSubstr(const string16& str, |
48 const string16& s, | 98 const string16& s, |
49 std::vector<string16>* r); | 99 std::vector<string16>* r); |
50 BASE_EXPORT void SplitStringUsingSubstr(const std::string& str, | 100 BASE_EXPORT void SplitStringUsingSubstr(const std::string& str, |
51 const std::string& s, | 101 const std::string& s, |
52 std::vector<std::string>* r); | 102 std::vector<std::string>* r); |
53 | 103 |
104 // ----------------------------------------------------------------------------- | |
105 // Backwards-compat wrappers | |
106 // | |
107 // New code should use one of the more general variants above. | |
108 // TODO(brettw) remove these and convert to the versions above. | |
109 | |
110 // Splits |str| into a vector of strings delimited by |c|, placing the results | |
111 // in |r|. If several instances of |c| are contiguous, or if |str| begins with | |
112 // or ends with |c|, then an empty string is inserted. | |
113 // | |
114 // Every substring is trimmed of any leading or trailing white space. | |
115 // NOTE: |c| must be in BMP (Basic Multilingual Plane) | |
116 BASE_EXPORT void SplitString(const string16& str, | |
117 char16 c, | |
118 std::vector<string16>* r); | |
119 | |
120 // |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which | |
121 // the trailing byte of a multi-byte character can be in the ASCII range. | |
122 // UTF-8, and other single/multi-byte ASCII-compatible encodings are OK. | |
123 // Note: |c| must be in the ASCII range. | |
124 BASE_EXPORT void SplitString(const std::string& str, | |
125 char c, | |
126 std::vector<std::string>* r); | |
127 | |
54 // The same as SplitString, but don't trim white space. | 128 // The same as SplitString, but don't trim white space. |
55 // NOTE: |c| must be in BMP (Basic Multilingual Plane) | 129 // NOTE: |c| must be in BMP (Basic Multilingual Plane) |
56 BASE_EXPORT void SplitStringDontTrim(const string16& str, | 130 BASE_EXPORT void SplitStringDontTrim(StringPiece16 str, |
57 char16 c, | 131 char16 c, |
58 std::vector<string16>* r); | 132 std::vector<string16>* r); |
59 // |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which | 133 // |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which |
60 // the trailing byte of a multi-byte character can be in the ASCII range. | 134 // the trailing byte of a multi-byte character can be in the ASCII range. |
61 // UTF-8, and other single/multi-byte ASCII-compatible encodings are OK. | 135 // UTF-8, and other single/multi-byte ASCII-compatible encodings are OK. |
62 // Note: |c| must be in the ASCII range. | 136 // Note: |c| must be in the ASCII range. |
63 BASE_EXPORT void SplitStringDontTrim(const std::string& str, | 137 BASE_EXPORT void SplitStringDontTrim(StringPiece str, |
64 char c, | 138 char c, |
65 std::vector<std::string>* r); | 139 std::vector<std::string>* result); |
66 | 140 |
67 // WARNING: this uses whitespace as defined by the HTML5 spec. If you need | 141 // WARNING: this uses whitespace as defined by the HTML5 spec (ASCII whitespace |
68 // a function similar to this but want to trim all types of whitespace, then | 142 // only). |
69 // factor this out into a function that takes a string containing the characters | |
70 // that are treated as whitespace. | |
71 // | 143 // |
72 // Splits the string along whitespace (where whitespace is the five space | 144 // The difference between this and calling SplitString with the whitespace |
73 // characters defined by HTML 5). Each contiguous block of non-whitespace | 145 // characters as separators is the treatment of the first element when the |
74 // characters is added to result. | 146 // string starts with whitespace. |
147 // | |
148 // Input SplitString SplitStringAlongWhitespace | |
149 // -------------------------------------------------------- | |
150 // " a " "", "a" "a" | |
75 BASE_EXPORT void SplitStringAlongWhitespace(const string16& str, | 151 BASE_EXPORT void SplitStringAlongWhitespace(const string16& str, |
76 std::vector<string16>* result); | 152 std::vector<string16>* result); |
77 BASE_EXPORT void SplitStringAlongWhitespace(const std::string& str, | 153 BASE_EXPORT void SplitStringAlongWhitespace(const std::string& str, |
78 std::vector<std::string>* result); | 154 std::vector<std::string>* result); |
79 | 155 |
80 } // namespace base | 156 } // namespace base |
81 | 157 |
82 #endif // BASE_STRINGS_STRING_SPLIT_H_ | 158 #endif // BASE_STRINGS_STRING_SPLIT_H_ |
OLD | NEW |