base/strings/string_tokenizer.h - Issue 1728833002: Add support to StringTokenizer for using braces/brackets as quote chars.

Side by Side Diff: base/strings/string_tokenizer.h

Issue 1728833002: Add support to StringTokenizer for using braces/brackets as quote chars. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #ifndef BASE_STRINGS_STRING_TOKENIZER_H_	5 #ifndef BASE_STRINGS_STRING_TOKENIZER_H_

6 #define BASE_STRINGS_STRING_TOKENIZER_H_	6 #define BASE_STRINGS_STRING_TOKENIZER_H_

7	7

8 #include <algorithm>	8 #include <algorithm>

9 #include <string>	9 #include <string>

10	10

(...skipping 40 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
51 // }	51 // }

52 //	52 //

53 // Output:	53 // Output:

54 //	54 //

55 // no-cache="foo, bar"	55 // no-cache="foo, bar"

56 // private	56 // private

57 //	57 //

58 //	58 //

59 // EXAMPLE 3:	59 // EXAMPLE 3:

60 //	60 //

	61 // std::string input = "<foo, bar>, \"bar, foo\", private";

	62 // StringTokenizer t(input, ", ");

	63 // t.set_paired_quote_chars("\"<", "\">");

	64 // while (t.GetNext()) {

	65 // printf("%s\n", t.token().c_str());

	66 // }

	67 //

	68 // Output:

	69 //

	70 // <foo, bar>

	71 // "bar, foo"

	72 // private

	73 //

	74 //

	75 // EXAMPLE 4:

	76 //

61 // bool next_is_option = false, next_is_value = false;	77 // bool next_is_option = false, next_is_value = false;

62 // std::string input = "text/html; charset=UTF-8; foo=bar";	78 // std::string input = "text/html; charset=UTF-8; foo=bar";

63 // StringTokenizer t(input, "; =");	79 // StringTokenizer t(input, "; =");

64 // t.set_options(StringTokenizer::RETURN_DELIMS);	80 // t.set_options(StringTokenizer::RETURN_DELIMS);

65 // while (t.GetNext()) {	81 // while (t.GetNext()) {

66 // if (t.token_is_delim()) {	82 // if (t.token_is_delim()) {

67 // switch (*t.token_begin()) {	83 // switch (*t.token_begin()) {

68 // case ';':	84 // case ';':

69 // next_is_option = true;	85 // next_is_option = true;

70 // break;	86 // break;

(...skipping 42 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
113 }	129 }

114	130

115 // Set the options for this tokenizer. By default, this is 0.	131 // Set the options for this tokenizer. By default, this is 0.

116 void set_options(int options) { options_ = options; }	132 void set_options(int options) { options_ = options; }

117	133

118 // Set the characters to regard as quotes. By default, this is empty. When	134 // Set the characters to regard as quotes. By default, this is empty. When

119 // a quote char is encountered, the tokenizer will switch into a mode where	135 // a quote char is encountered, the tokenizer will switch into a mode where

120 // it ignores delimiters that it finds. It switches out of this mode once it	136 // it ignores delimiters that it finds. It switches out of this mode once it

121 // finds another instance of the quote char. If a backslash is encountered	137 // finds another instance of the quote char. If a backslash is encountered

122 // within a quoted string, then the next character is skipped.	138 // within a quoted string, then the next character is skipped.

123 void set_quote_chars(const str& quotes) { quotes_ = quotes; }	139 void set_quote_chars(const str& quotes) {

	140 set_paired_quote_chars(quotes, quotes);

	141 }

	142

	143 // Similar to set_quote_chars, but makes it possible to have a different char

	144 // to start and end a quoted string. The open_quotes and close_quotes

	145 // parameters must have the same number of characters.

	146 void set_paired_quote_chars(const str& open_quotes, const str& close_quotes) {

	147 DCHECK_EQ(open_quotes.size(), close_quotes.size());

	148 open_quotes_ = open_quotes;

	149 close_quotes_ = close_quotes;

	150 }

124	151

125 // Call this method to advance the tokenizer to the next delimiter. This	152 // Call this method to advance the tokenizer to the next delimiter. This

126 // returns false if the tokenizer is complete. This method must be called	153 // returns false if the tokenizer is complete. This method must be called

127 // before calling any of the token* methods.	154 // before calling any of the token* methods.

128 bool GetNext() {	155 bool GetNext() {

129 if (quotes_.empty() && options_ == 0)	156 if (open_quotes_.empty() && options_ == 0)

130 return QuickGetNext();	157 return QuickGetNext();

131 else	158 else

132 return FullGetNext();	159 return FullGetNext();

133 }	160 }

134	161

135 // Start iterating through tokens from the beginning of the string.	162 // Start iterating through tokens from the beginning of the string.

136 void Reset() {	163 void Reset() {

137 token_end_ = start_pos_;	164 token_end_ = start_pos_;

138 }	165 }

139	166

(...skipping 63 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
203 }	230 }

204 while (token_end_ != end_ && AdvanceOne(&state, *token_end_))	231 while (token_end_ != end_ && AdvanceOne(&state, *token_end_))

205 ++token_end_;	232 ++token_end_;

206 return true;	233 return true;

207 }	234 }

208	235

209 bool IsDelim(char_type c) const {	236 bool IsDelim(char_type c) const {

210 return delims_.find(c) != str::npos;	237 return delims_.find(c) != str::npos;

211 }	238 }

212	239

213 bool IsQuote(char_type c) const {	240 bool IsQuote(char_type c, char_type* close_quote) const {

214 return quotes_.find(c) != str::npos;	241 size_t pos = open_quotes_.find(c);

	242 if (pos != str::npos)

	243 *close_quote = close_quotes_[pos];

	244 return pos != str::npos;

215 }	245 }

216	246

217 struct AdvanceState {	247 struct AdvanceState {

218 bool in_quote;	248 bool in_quote;

219 bool in_escape;	249 bool in_escape;

220 char_type quote_char;	250 char_type quote_char;

221 AdvanceState() : in_quote(false), in_escape(false), quote_char('\0') {}	251 AdvanceState() : in_quote(false), in_escape(false), quote_char('\0') {}

222 };	252 };

223	253

224 // Returns true if a delimiter was not hit.	254 // Returns true if a delimiter was not hit.

225 bool AdvanceOne(AdvanceState* state, char_type c) {	255 bool AdvanceOne(AdvanceState* state, char_type c) {

226 if (state->in_quote) {	256 if (state->in_quote) {

227 if (state->in_escape) {	257 if (state->in_escape) {

228 state->in_escape = false;	258 state->in_escape = false;

229 } else if (c == '\\') {	259 } else if (c == '\\') {

230 state->in_escape = true;	260 state->in_escape = true;

231 } else if (c == state->quote_char) {	261 } else if (c == state->quote_char) {

232 state->in_quote = false;	262 state->in_quote = false;

233 }	263 }

234 } else {	264 } else {

235 if (IsDelim(c))	265 if (IsDelim(c))

236 return false;	266 return false;

237 state->in_quote = IsQuote(state->quote_char = c);	267 state->in_quote = IsQuote(c, &state->quote_char);

238 }	268 }

239 return true;	269 return true;

240 }	270 }

241	271

242 const_iterator start_pos_;	272 const_iterator start_pos_;

243 const_iterator token_begin_;	273 const_iterator token_begin_;

244 const_iterator token_end_;	274 const_iterator token_end_;

245 const_iterator end_;	275 const_iterator end_;

246 str delims_;	276 str delims_;

247 str quotes_;	277 str open_quotes_;

	278 str close_quotes_;

248 int options_;	279 int options_;

249 bool token_is_delim_;	280 bool token_is_delim_;

250 };	281 };

251	282

252 typedef StringTokenizerT<std::string, std::string::const_iterator>	283 typedef StringTokenizerT<std::string, std::string::const_iterator>

253 StringTokenizer;	284 StringTokenizer;

254 typedef StringTokenizerT<std::wstring, std::wstring::const_iterator>	285 typedef StringTokenizerT<std::wstring, std::wstring::const_iterator>

255 WStringTokenizer;	286 WStringTokenizer;

256 typedef StringTokenizerT<std::string, const char*> CStringTokenizer;	287 typedef StringTokenizerT<std::string, const char*> CStringTokenizer;

257	288

258 } // namespace base	289 } // namespace base

259	290

260 #endif // BASE_STRINGS_STRING_TOKENIZER_H_	291 #endif // BASE_STRINGS_STRING_TOKENIZER_H_

OLD	NEW

« no previous file with comments | « no previous file | base/strings/string_tokenizer_unittest.cc » ('j') | no next file with comments »