Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(516)

Side by Side Diff: base/strings/string_tokenizer.h

Issue 1728833002: Add support to StringTokenizer for using braces/brackets as quote chars. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | base/strings/string_tokenizer_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef BASE_STRINGS_STRING_TOKENIZER_H_ 5 #ifndef BASE_STRINGS_STRING_TOKENIZER_H_
6 #define BASE_STRINGS_STRING_TOKENIZER_H_ 6 #define BASE_STRINGS_STRING_TOKENIZER_H_
7 7
8 #include <algorithm> 8 #include <algorithm>
9 #include <string> 9 #include <string>
10 10
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
51 // } 51 // }
52 // 52 //
53 // Output: 53 // Output:
54 // 54 //
55 // no-cache="foo, bar" 55 // no-cache="foo, bar"
56 // private 56 // private
57 // 57 //
58 // 58 //
59 // EXAMPLE 3: 59 // EXAMPLE 3:
60 // 60 //
61 // std::string input = "<foo, bar>, \"bar, foo\", private";
62 // StringTokenizer t(input, ", ");
63 // t.set_paired_quote_chars("\"<", "\">");
64 // while (t.GetNext()) {
65 // printf("%s\n", t.token().c_str());
66 // }
67 //
68 // Output:
69 //
70 // <foo, bar>
71 // "bar, foo"
72 // private
73 //
74 //
75 // EXAMPLE 4:
76 //
61 // bool next_is_option = false, next_is_value = false; 77 // bool next_is_option = false, next_is_value = false;
62 // std::string input = "text/html; charset=UTF-8; foo=bar"; 78 // std::string input = "text/html; charset=UTF-8; foo=bar";
63 // StringTokenizer t(input, "; ="); 79 // StringTokenizer t(input, "; =");
64 // t.set_options(StringTokenizer::RETURN_DELIMS); 80 // t.set_options(StringTokenizer::RETURN_DELIMS);
65 // while (t.GetNext()) { 81 // while (t.GetNext()) {
66 // if (t.token_is_delim()) { 82 // if (t.token_is_delim()) {
67 // switch (*t.token_begin()) { 83 // switch (*t.token_begin()) {
68 // case ';': 84 // case ';':
69 // next_is_option = true; 85 // next_is_option = true;
70 // break; 86 // break;
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
113 } 129 }
114 130
115 // Set the options for this tokenizer. By default, this is 0. 131 // Set the options for this tokenizer. By default, this is 0.
116 void set_options(int options) { options_ = options; } 132 void set_options(int options) { options_ = options; }
117 133
118 // Set the characters to regard as quotes. By default, this is empty. When 134 // Set the characters to regard as quotes. By default, this is empty. When
119 // a quote char is encountered, the tokenizer will switch into a mode where 135 // a quote char is encountered, the tokenizer will switch into a mode where
120 // it ignores delimiters that it finds. It switches out of this mode once it 136 // it ignores delimiters that it finds. It switches out of this mode once it
121 // finds another instance of the quote char. If a backslash is encountered 137 // finds another instance of the quote char. If a backslash is encountered
122 // within a quoted string, then the next character is skipped. 138 // within a quoted string, then the next character is skipped.
123 void set_quote_chars(const str& quotes) { quotes_ = quotes; } 139 void set_quote_chars(const str& quotes) {
140 set_paired_quote_chars(quotes, quotes);
141 }
142
143 // Similar to set_quote_chars, but makes it possible to have a different char
144 // to start and end a quoted string. The open_quotes and close_quotes
145 // parameters must have the same number of characters.
146 void set_paired_quote_chars(const str& open_quotes, const str& close_quotes) {
147 DCHECK_EQ(open_quotes.size(), close_quotes.size());
148 open_quotes_ = open_quotes;
149 close_quotes_ = close_quotes;
150 }
124 151
125 // Call this method to advance the tokenizer to the next delimiter. This 152 // Call this method to advance the tokenizer to the next delimiter. This
126 // returns false if the tokenizer is complete. This method must be called 153 // returns false if the tokenizer is complete. This method must be called
127 // before calling any of the token* methods. 154 // before calling any of the token* methods.
128 bool GetNext() { 155 bool GetNext() {
129 if (quotes_.empty() && options_ == 0) 156 if (open_quotes_.empty() && options_ == 0)
130 return QuickGetNext(); 157 return QuickGetNext();
131 else 158 else
132 return FullGetNext(); 159 return FullGetNext();
133 } 160 }
134 161
135 // Start iterating through tokens from the beginning of the string. 162 // Start iterating through tokens from the beginning of the string.
136 void Reset() { 163 void Reset() {
137 token_end_ = start_pos_; 164 token_end_ = start_pos_;
138 } 165 }
139 166
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
203 } 230 }
204 while (token_end_ != end_ && AdvanceOne(&state, *token_end_)) 231 while (token_end_ != end_ && AdvanceOne(&state, *token_end_))
205 ++token_end_; 232 ++token_end_;
206 return true; 233 return true;
207 } 234 }
208 235
209 bool IsDelim(char_type c) const { 236 bool IsDelim(char_type c) const {
210 return delims_.find(c) != str::npos; 237 return delims_.find(c) != str::npos;
211 } 238 }
212 239
213 bool IsQuote(char_type c) const { 240 bool IsQuote(char_type c, char_type* close_quote) const {
214 return quotes_.find(c) != str::npos; 241 size_t pos = open_quotes_.find(c);
242 if (pos != str::npos)
243 *close_quote = close_quotes_[pos];
244 return pos != str::npos;
215 } 245 }
216 246
217 struct AdvanceState { 247 struct AdvanceState {
218 bool in_quote; 248 bool in_quote;
219 bool in_escape; 249 bool in_escape;
220 char_type quote_char; 250 char_type quote_char;
221 AdvanceState() : in_quote(false), in_escape(false), quote_char('\0') {} 251 AdvanceState() : in_quote(false), in_escape(false), quote_char('\0') {}
222 }; 252 };
223 253
224 // Returns true if a delimiter was not hit. 254 // Returns true if a delimiter was not hit.
225 bool AdvanceOne(AdvanceState* state, char_type c) { 255 bool AdvanceOne(AdvanceState* state, char_type c) {
226 if (state->in_quote) { 256 if (state->in_quote) {
227 if (state->in_escape) { 257 if (state->in_escape) {
228 state->in_escape = false; 258 state->in_escape = false;
229 } else if (c == '\\') { 259 } else if (c == '\\') {
230 state->in_escape = true; 260 state->in_escape = true;
231 } else if (c == state->quote_char) { 261 } else if (c == state->quote_char) {
232 state->in_quote = false; 262 state->in_quote = false;
233 } 263 }
234 } else { 264 } else {
235 if (IsDelim(c)) 265 if (IsDelim(c))
236 return false; 266 return false;
237 state->in_quote = IsQuote(state->quote_char = c); 267 state->in_quote = IsQuote(c, &state->quote_char);
238 } 268 }
239 return true; 269 return true;
240 } 270 }
241 271
242 const_iterator start_pos_; 272 const_iterator start_pos_;
243 const_iterator token_begin_; 273 const_iterator token_begin_;
244 const_iterator token_end_; 274 const_iterator token_end_;
245 const_iterator end_; 275 const_iterator end_;
246 str delims_; 276 str delims_;
247 str quotes_; 277 str open_quotes_;
278 str close_quotes_;
248 int options_; 279 int options_;
249 bool token_is_delim_; 280 bool token_is_delim_;
250 }; 281 };
251 282
252 typedef StringTokenizerT<std::string, std::string::const_iterator> 283 typedef StringTokenizerT<std::string, std::string::const_iterator>
253 StringTokenizer; 284 StringTokenizer;
254 typedef StringTokenizerT<std::wstring, std::wstring::const_iterator> 285 typedef StringTokenizerT<std::wstring, std::wstring::const_iterator>
255 WStringTokenizer; 286 WStringTokenizer;
256 typedef StringTokenizerT<std::string, const char*> CStringTokenizer; 287 typedef StringTokenizerT<std::string, const char*> CStringTokenizer;
257 288
258 } // namespace base 289 } // namespace base
259 290
260 #endif // BASE_STRINGS_STRING_TOKENIZER_H_ 291 #endif // BASE_STRINGS_STRING_TOKENIZER_H_
OLDNEW
« no previous file with comments | « no previous file | base/strings/string_tokenizer_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698