OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef BASE_STRINGS_STRING_TOKENIZER_H_ | 5 #ifndef BASE_STRINGS_STRING_TOKENIZER_H_ |
6 #define BASE_STRINGS_STRING_TOKENIZER_H_ | 6 #define BASE_STRINGS_STRING_TOKENIZER_H_ |
7 | 7 |
8 #include <algorithm> | 8 #include <algorithm> |
9 #include <string> | 9 #include <string> |
10 | 10 |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
51 // } | 51 // } |
52 // | 52 // |
53 // Output: | 53 // Output: |
54 // | 54 // |
55 // no-cache="foo, bar" | 55 // no-cache="foo, bar" |
56 // private | 56 // private |
57 // | 57 // |
58 // | 58 // |
59 // EXAMPLE 3: | 59 // EXAMPLE 3: |
60 // | 60 // |
| 61 // std::string input = "<foo, bar>, \"bar, foo\", private"; |
| 62 // StringTokenizer t(input, ", "); |
| 63 // t.set_paired_quote_chars("\"<", "\">"); |
| 64 // while (t.GetNext()) { |
| 65 // printf("%s\n", t.token().c_str()); |
| 66 // } |
| 67 // |
| 68 // Output: |
| 69 // |
| 70 // <foo, bar> |
| 71 // "bar, foo" |
| 72 // private |
| 73 // |
| 74 // |
| 75 // EXAMPLE 4: |
| 76 // |
61 // bool next_is_option = false, next_is_value = false; | 77 // bool next_is_option = false, next_is_value = false; |
62 // std::string input = "text/html; charset=UTF-8; foo=bar"; | 78 // std::string input = "text/html; charset=UTF-8; foo=bar"; |
63 // StringTokenizer t(input, "; ="); | 79 // StringTokenizer t(input, "; ="); |
64 // t.set_options(StringTokenizer::RETURN_DELIMS); | 80 // t.set_options(StringTokenizer::RETURN_DELIMS); |
65 // while (t.GetNext()) { | 81 // while (t.GetNext()) { |
66 // if (t.token_is_delim()) { | 82 // if (t.token_is_delim()) { |
67 // switch (*t.token_begin()) { | 83 // switch (*t.token_begin()) { |
68 // case ';': | 84 // case ';': |
69 // next_is_option = true; | 85 // next_is_option = true; |
70 // break; | 86 // break; |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
113 } | 129 } |
114 | 130 |
115 // Set the options for this tokenizer. By default, this is 0. | 131 // Set the options for this tokenizer. By default, this is 0. |
116 void set_options(int options) { options_ = options; } | 132 void set_options(int options) { options_ = options; } |
117 | 133 |
118 // Set the characters to regard as quotes. By default, this is empty. When | 134 // Set the characters to regard as quotes. By default, this is empty. When |
119 // a quote char is encountered, the tokenizer will switch into a mode where | 135 // a quote char is encountered, the tokenizer will switch into a mode where |
120 // it ignores delimiters that it finds. It switches out of this mode once it | 136 // it ignores delimiters that it finds. It switches out of this mode once it |
121 // finds another instance of the quote char. If a backslash is encountered | 137 // finds another instance of the quote char. If a backslash is encountered |
122 // within a quoted string, then the next character is skipped. | 138 // within a quoted string, then the next character is skipped. |
123 void set_quote_chars(const str& quotes) { quotes_ = quotes; } | 139 void set_quote_chars(const str& quotes) { |
| 140 set_paired_quote_chars(quotes, quotes); |
| 141 } |
| 142 |
| 143 // Similar to set_quote_chars, but makes it possible to have a different char |
| 144 // to start and end a quoted string. The open_quotes and close_quotes |
| 145 // parameters must have the same number of characters. |
| 146 void set_paired_quote_chars(const str& open_quotes, const str& close_quotes) { |
| 147 DCHECK_EQ(open_quotes.size(), close_quotes.size()); |
| 148 open_quotes_ = open_quotes; |
| 149 close_quotes_ = close_quotes; |
| 150 } |
124 | 151 |
125 // Call this method to advance the tokenizer to the next delimiter. This | 152 // Call this method to advance the tokenizer to the next delimiter. This |
126 // returns false if the tokenizer is complete. This method must be called | 153 // returns false if the tokenizer is complete. This method must be called |
127 // before calling any of the token* methods. | 154 // before calling any of the token* methods. |
128 bool GetNext() { | 155 bool GetNext() { |
129 if (quotes_.empty() && options_ == 0) | 156 if (open_quotes_.empty() && options_ == 0) |
130 return QuickGetNext(); | 157 return QuickGetNext(); |
131 else | 158 else |
132 return FullGetNext(); | 159 return FullGetNext(); |
133 } | 160 } |
134 | 161 |
135 // Start iterating through tokens from the beginning of the string. | 162 // Start iterating through tokens from the beginning of the string. |
136 void Reset() { | 163 void Reset() { |
137 token_end_ = start_pos_; | 164 token_end_ = start_pos_; |
138 } | 165 } |
139 | 166 |
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
203 } | 230 } |
204 while (token_end_ != end_ && AdvanceOne(&state, *token_end_)) | 231 while (token_end_ != end_ && AdvanceOne(&state, *token_end_)) |
205 ++token_end_; | 232 ++token_end_; |
206 return true; | 233 return true; |
207 } | 234 } |
208 | 235 |
209 bool IsDelim(char_type c) const { | 236 bool IsDelim(char_type c) const { |
210 return delims_.find(c) != str::npos; | 237 return delims_.find(c) != str::npos; |
211 } | 238 } |
212 | 239 |
213 bool IsQuote(char_type c) const { | 240 bool IsQuote(char_type c, char_type* close_quote) const { |
214 return quotes_.find(c) != str::npos; | 241 size_t pos = open_quotes_.find(c); |
| 242 if (pos != str::npos) |
| 243 *close_quote = close_quotes_[pos]; |
| 244 return pos != str::npos; |
215 } | 245 } |
216 | 246 |
217 struct AdvanceState { | 247 struct AdvanceState { |
218 bool in_quote; | 248 bool in_quote; |
219 bool in_escape; | 249 bool in_escape; |
220 char_type quote_char; | 250 char_type quote_char; |
221 AdvanceState() : in_quote(false), in_escape(false), quote_char('\0') {} | 251 AdvanceState() : in_quote(false), in_escape(false), quote_char('\0') {} |
222 }; | 252 }; |
223 | 253 |
224 // Returns true if a delimiter was not hit. | 254 // Returns true if a delimiter was not hit. |
225 bool AdvanceOne(AdvanceState* state, char_type c) { | 255 bool AdvanceOne(AdvanceState* state, char_type c) { |
226 if (state->in_quote) { | 256 if (state->in_quote) { |
227 if (state->in_escape) { | 257 if (state->in_escape) { |
228 state->in_escape = false; | 258 state->in_escape = false; |
229 } else if (c == '\\') { | 259 } else if (c == '\\') { |
230 state->in_escape = true; | 260 state->in_escape = true; |
231 } else if (c == state->quote_char) { | 261 } else if (c == state->quote_char) { |
232 state->in_quote = false; | 262 state->in_quote = false; |
233 } | 263 } |
234 } else { | 264 } else { |
235 if (IsDelim(c)) | 265 if (IsDelim(c)) |
236 return false; | 266 return false; |
237 state->in_quote = IsQuote(state->quote_char = c); | 267 state->in_quote = IsQuote(c, &state->quote_char); |
238 } | 268 } |
239 return true; | 269 return true; |
240 } | 270 } |
241 | 271 |
242 const_iterator start_pos_; | 272 const_iterator start_pos_; |
243 const_iterator token_begin_; | 273 const_iterator token_begin_; |
244 const_iterator token_end_; | 274 const_iterator token_end_; |
245 const_iterator end_; | 275 const_iterator end_; |
246 str delims_; | 276 str delims_; |
247 str quotes_; | 277 str open_quotes_; |
| 278 str close_quotes_; |
248 int options_; | 279 int options_; |
249 bool token_is_delim_; | 280 bool token_is_delim_; |
250 }; | 281 }; |
251 | 282 |
252 typedef StringTokenizerT<std::string, std::string::const_iterator> | 283 typedef StringTokenizerT<std::string, std::string::const_iterator> |
253 StringTokenizer; | 284 StringTokenizer; |
254 typedef StringTokenizerT<std::wstring, std::wstring::const_iterator> | 285 typedef StringTokenizerT<std::wstring, std::wstring::const_iterator> |
255 WStringTokenizer; | 286 WStringTokenizer; |
256 typedef StringTokenizerT<std::string, const char*> CStringTokenizer; | 287 typedef StringTokenizerT<std::string, const char*> CStringTokenizer; |
257 | 288 |
258 } // namespace base | 289 } // namespace base |
259 | 290 |
260 #endif // BASE_STRINGS_STRING_TOKENIZER_H_ | 291 #endif // BASE_STRINGS_STRING_TOKENIZER_H_ |
OLD | NEW |