Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(130)

Side by Side Diff: src/regexp/regexp-parser.h

Issue 2050343002: [regexp] Experimental support for regexp named captures (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Rebase Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/regexp/regexp-ast.h ('k') | src/regexp/regexp-parser.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef V8_REGEXP_REGEXP_PARSER_H_ 5 #ifndef V8_REGEXP_REGEXP_PARSER_H_
6 #define V8_REGEXP_REGEXP_PARSER_H_ 6 #define V8_REGEXP_REGEXP_PARSER_H_
7 7
8 #include "src/objects.h" 8 #include "src/objects.h"
9 #include "src/regexp/regexp-ast.h" 9 #include "src/regexp/regexp-ast.h"
10 #include "src/zone.h" 10 #include "src/zone.h"
(...skipping 204 matching lines...) Expand 10 before | Expand all | Expand 10 after
215 POSITIVE_LOOKAROUND, 215 POSITIVE_LOOKAROUND,
216 NEGATIVE_LOOKAROUND, 216 NEGATIVE_LOOKAROUND,
217 GROUPING 217 GROUPING
218 }; 218 };
219 219
220 class RegExpParserState : public ZoneObject { 220 class RegExpParserState : public ZoneObject {
221 public: 221 public:
222 RegExpParserState(RegExpParserState* previous_state, 222 RegExpParserState(RegExpParserState* previous_state,
223 SubexpressionType group_type, 223 SubexpressionType group_type,
224 RegExpLookaround::Type lookaround_type, 224 RegExpLookaround::Type lookaround_type,
225 int disjunction_capture_index, bool ignore_case, 225 int disjunction_capture_index,
226 const ZoneVector<uc16>* capture_name, bool ignore_case,
226 bool unicode, Zone* zone) 227 bool unicode, Zone* zone)
227 : previous_state_(previous_state), 228 : previous_state_(previous_state),
228 builder_(new (zone) RegExpBuilder(zone, ignore_case, unicode)), 229 builder_(new (zone) RegExpBuilder(zone, ignore_case, unicode)),
229 group_type_(group_type), 230 group_type_(group_type),
230 lookaround_type_(lookaround_type), 231 lookaround_type_(lookaround_type),
231 disjunction_capture_index_(disjunction_capture_index) {} 232 disjunction_capture_index_(disjunction_capture_index),
233 capture_name_(capture_name) {}
232 // Parser state of containing expression, if any. 234 // Parser state of containing expression, if any.
233 RegExpParserState* previous_state() { return previous_state_; } 235 RegExpParserState* previous_state() { return previous_state_; }
234 bool IsSubexpression() { return previous_state_ != NULL; } 236 bool IsSubexpression() { return previous_state_ != NULL; }
235 // RegExpBuilder building this regexp's AST. 237 // RegExpBuilder building this regexp's AST.
236 RegExpBuilder* builder() { return builder_; } 238 RegExpBuilder* builder() { return builder_; }
237 // Type of regexp being parsed (parenthesized group or entire regexp). 239 // Type of regexp being parsed (parenthesized group or entire regexp).
238 SubexpressionType group_type() { return group_type_; } 240 SubexpressionType group_type() { return group_type_; }
239 // Lookahead or Lookbehind. 241 // Lookahead or Lookbehind.
240 RegExpLookaround::Type lookaround_type() { return lookaround_type_; } 242 RegExpLookaround::Type lookaround_type() { return lookaround_type_; }
241 // Index in captures array of first capture in this sub-expression, if any. 243 // Index in captures array of first capture in this sub-expression, if any.
242 // Also the capture index of this sub-expression itself, if group_type 244 // Also the capture index of this sub-expression itself, if group_type
243 // is CAPTURE. 245 // is CAPTURE.
244 int capture_index() { return disjunction_capture_index_; } 246 int capture_index() { return disjunction_capture_index_; }
247 // The name of the current sub-expression, if group_type is CAPTURE. Only
248 // used for named captures.
249 const ZoneVector<uc16>* capture_name() { return capture_name_; }
250
251 bool IsNamedCapture() const { return capture_name_ != nullptr; }
245 252
246 // Check whether the parser is inside a capture group with the given index. 253 // Check whether the parser is inside a capture group with the given index.
247 bool IsInsideCaptureGroup(int index); 254 bool IsInsideCaptureGroup(int index);
255 // Check whether the parser is inside a capture group with the given name.
256 bool IsInsideCaptureGroup(const ZoneVector<uc16>* name);
248 257
249 private: 258 private:
250 // Linked list implementation of stack of states. 259 // Linked list implementation of stack of states.
251 RegExpParserState* previous_state_; 260 RegExpParserState* previous_state_;
252 // Builder for the stored disjunction. 261 // Builder for the stored disjunction.
253 RegExpBuilder* builder_; 262 RegExpBuilder* builder_;
254 // Stored disjunction type (capture, look-ahead or grouping), if any. 263 // Stored disjunction type (capture, look-ahead or grouping), if any.
255 SubexpressionType group_type_; 264 SubexpressionType group_type_;
256 // Stored read direction. 265 // Stored read direction.
257 RegExpLookaround::Type lookaround_type_; 266 RegExpLookaround::Type lookaround_type_;
258 // Stored disjunction's capture index (if any). 267 // Stored disjunction's capture index (if any).
259 int disjunction_capture_index_; 268 int disjunction_capture_index_;
269 // Stored capture name (if any).
270 const ZoneVector<uc16>* capture_name_;
260 }; 271 };
261 272
262 // Return the 1-indexed RegExpCapture object, allocate if necessary. 273 // Return the 1-indexed RegExpCapture object, allocate if necessary.
263 RegExpCapture* GetCapture(int index); 274 RegExpCapture* GetCapture(int index);
264 275
276 // Creates a new named capture at the specified index. Must be called exactly
277 // once for each named capture. Fails if a capture with the same name is
278 // encountered.
279 bool CreateNamedCaptureAtIndex(const ZoneVector<uc16>* name, int index);
280
281 // Parses the name of a capture group (?<name>pattern). The name must adhere
282 // to IdentifierName in the ECMAScript standard.
283 const ZoneVector<uc16>* ParseCaptureGroupName();
284
285 bool ParseNamedBackReference(RegExpBuilder* builder,
286 RegExpParserState* state);
287
288 // After the initial parsing pass, patch corresponding RegExpCapture objects
289 // into all RegExpBackReferences. This is done after initial parsing in order
290 // to avoid complicating cases in which references comes before the capture.
291 void PatchNamedBackReferences();
292
293 Handle<FixedArray> CreateCaptureNameMap();
294
265 Isolate* isolate() { return isolate_; } 295 Isolate* isolate() { return isolate_; }
266 Zone* zone() const { return zone_; } 296 Zone* zone() const { return zone_; }
267 297
268 uc32 current() { return current_; } 298 uc32 current() { return current_; }
269 bool has_more() { return has_more_; } 299 bool has_more() { return has_more_; }
270 bool has_next() { return next_pos_ < in()->length(); } 300 bool has_next() { return next_pos_ < in()->length(); }
271 uc32 Next(); 301 uc32 Next();
272 template <bool update_position> 302 template <bool update_position>
273 uc32 ReadNext(); 303 uc32 ReadNext();
274 FlatStringReader* in() { return in_; } 304 FlatStringReader* in() { return in_; }
275 void ScanForCaptures(); 305 void ScanForCaptures();
276 306
277 Isolate* isolate_; 307 Isolate* isolate_;
278 Zone* zone_; 308 Zone* zone_;
279 Handle<String>* error_; 309 Handle<String>* error_;
280 ZoneList<RegExpCapture*>* captures_; 310 ZoneList<RegExpCapture*>* captures_;
311 ZoneList<RegExpCapture*>* named_captures_;
312 ZoneList<RegExpBackReference*>* named_back_references_;
281 FlatStringReader* in_; 313 FlatStringReader* in_;
282 uc32 current_; 314 uc32 current_;
283 bool ignore_case_; 315 bool ignore_case_;
284 bool multiline_; 316 bool multiline_;
285 bool unicode_; 317 bool unicode_;
286 int next_pos_; 318 int next_pos_;
287 int captures_started_; 319 int captures_started_;
288 // The capture count is only valid after we have scanned for captures. 320 // The capture count is only valid after we have scanned for captures.
289 int capture_count_; 321 int capture_count_;
290 bool has_more_; 322 bool has_more_;
291 bool simple_; 323 bool simple_;
292 bool contains_anchor_; 324 bool contains_anchor_;
293 bool is_scanned_for_captures_; 325 bool is_scanned_for_captures_;
294 bool failed_; 326 bool failed_;
295 }; 327 };
296 328
297 } // namespace internal 329 } // namespace internal
298 } // namespace v8 330 } // namespace v8
299 331
300 #endif // V8_REGEXP_REGEXP_PARSER_H_ 332 #endif // V8_REGEXP_REGEXP_PARSER_H_
OLDNEW
« no previous file with comments | « src/regexp/regexp-ast.h ('k') | src/regexp/regexp-parser.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698