Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Side by Side Diff: src/regexp/jsregexp.h

Issue 1565183002: [regexp] move regexp parser into own files. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: fix test compile Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/parsing/scanner.h ('k') | src/regexp/jsregexp.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef V8_REGEXP_JSREGEXP_H_ 5 #ifndef V8_REGEXP_JSREGEXP_H_
6 #define V8_REGEXP_JSREGEXP_H_ 6 #define V8_REGEXP_JSREGEXP_H_
7 7
8 #include "src/allocation.h" 8 #include "src/allocation.h"
9 #include "src/assembler.h" 9 #include "src/assembler.h"
10 #include "src/regexp/regexp-ast.h"
10 11
11 namespace v8 { 12 namespace v8 {
12 namespace internal { 13 namespace internal {
13 14
14 class NodeVisitor; 15 class NodeVisitor;
15 class RegExpCompiler; 16 class RegExpCompiler;
16 class RegExpMacroAssembler; 17 class RegExpMacroAssembler;
17 class RegExpNode; 18 class RegExpNode;
18 class RegExpTree; 19 class RegExpTree;
19 class BoyerMooreLookahead; 20 class BoyerMooreLookahead;
(...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after
219 // Represents the location of one element relative to the intersection of 220 // Represents the location of one element relative to the intersection of
220 // two sets. Corresponds to the four areas of a Venn diagram. 221 // two sets. Corresponds to the four areas of a Venn diagram.
221 enum ElementInSetsRelation { 222 enum ElementInSetsRelation {
222 kInsideNone = 0, 223 kInsideNone = 0,
223 kInsideFirst = 1, 224 kInsideFirst = 1,
224 kInsideSecond = 2, 225 kInsideSecond = 2,
225 kInsideBoth = 3 226 kInsideBoth = 3
226 }; 227 };
227 228
228 229
229 // Represents code units in the range from from_ to to_, both ends are
230 // inclusive.
231 class CharacterRange {
232 public:
233 CharacterRange() : from_(0), to_(0) { }
234 // For compatibility with the CHECK_OK macro
235 CharacterRange(void* null) { DCHECK_NULL(null); } // NOLINT
236 CharacterRange(uc16 from, uc16 to) : from_(from), to_(to) { }
237 static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges,
238 Zone* zone);
239 static Vector<const int> GetWordBounds();
240 static inline CharacterRange Singleton(uc16 value) {
241 return CharacterRange(value, value);
242 }
243 static inline CharacterRange Range(uc16 from, uc16 to) {
244 DCHECK(from <= to);
245 return CharacterRange(from, to);
246 }
247 static inline CharacterRange Everything() {
248 return CharacterRange(0, 0xFFFF);
249 }
250 bool Contains(uc16 i) { return from_ <= i && i <= to_; }
251 uc16 from() const { return from_; }
252 void set_from(uc16 value) { from_ = value; }
253 uc16 to() const { return to_; }
254 void set_to(uc16 value) { to_ = value; }
255 bool is_valid() { return from_ <= to_; }
256 bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; }
257 bool IsSingleton() { return (from_ == to_); }
258 void AddCaseEquivalents(Isolate* isolate, Zone* zone,
259 ZoneList<CharacterRange>* ranges, bool is_one_byte);
260 static void Split(ZoneList<CharacterRange>* base,
261 Vector<const int> overlay,
262 ZoneList<CharacterRange>** included,
263 ZoneList<CharacterRange>** excluded,
264 Zone* zone);
265 // Whether a range list is in canonical form: Ranges ordered by from value,
266 // and ranges non-overlapping and non-adjacent.
267 static bool IsCanonical(ZoneList<CharacterRange>* ranges);
268 // Convert range list to canonical form. The characters covered by the ranges
269 // will still be the same, but no character is in more than one range, and
270 // adjacent ranges are merged. The resulting list may be shorter than the
271 // original, but cannot be longer.
272 static void Canonicalize(ZoneList<CharacterRange>* ranges);
273 // Negate the contents of a character range in canonical form.
274 static void Negate(ZoneList<CharacterRange>* src,
275 ZoneList<CharacterRange>* dst,
276 Zone* zone);
277 static const int kStartMarker = (1 << 24);
278 static const int kPayloadMask = (1 << 24) - 1;
279
280 private:
281 uc16 from_;
282 uc16 to_;
283 };
284
285
286 // A set of unsigned integers that behaves especially well on small 230 // A set of unsigned integers that behaves especially well on small
287 // integers (< 32). May do zone-allocation. 231 // integers (< 32). May do zone-allocation.
288 class OutSet: public ZoneObject { 232 class OutSet: public ZoneObject {
289 public: 233 public:
290 OutSet() : first_(0), remaining_(NULL), successors_(NULL) { } 234 OutSet() : first_(0), remaining_(NULL), successors_(NULL) { }
291 OutSet* Extend(unsigned value, Zone* zone); 235 OutSet* Extend(unsigned value, Zone* zone);
292 bool Get(unsigned value) const; 236 bool Get(unsigned value) const;
293 static const unsigned kFirstLimit = 32; 237 static const unsigned kFirstLimit = 32;
294 238
295 private: 239 private:
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
373 317
374 #define FOR_EACH_NODE_TYPE(VISIT) \ 318 #define FOR_EACH_NODE_TYPE(VISIT) \
375 VISIT(End) \ 319 VISIT(End) \
376 VISIT(Action) \ 320 VISIT(Action) \
377 VISIT(Choice) \ 321 VISIT(Choice) \
378 VISIT(BackReference) \ 322 VISIT(BackReference) \
379 VISIT(Assertion) \ 323 VISIT(Assertion) \
380 VISIT(Text) 324 VISIT(Text)
381 325
382 326
383 #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \
384 VISIT(Disjunction) \
385 VISIT(Alternative) \
386 VISIT(Assertion) \
387 VISIT(CharacterClass) \
388 VISIT(Atom) \
389 VISIT(Quantifier) \
390 VISIT(Capture) \
391 VISIT(Lookaround) \
392 VISIT(BackReference) \
393 VISIT(Empty) \
394 VISIT(Text)
395
396
397 #define FORWARD_DECLARE(Name) class RegExp##Name;
398 FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE)
399 #undef FORWARD_DECLARE
400
401
402 class TextElement final BASE_EMBEDDED {
403 public:
404 enum TextType {
405 ATOM,
406 CHAR_CLASS
407 };
408
409 static TextElement Atom(RegExpAtom* atom);
410 static TextElement CharClass(RegExpCharacterClass* char_class);
411
412 int cp_offset() const { return cp_offset_; }
413 void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
414 int length() const;
415
416 TextType text_type() const { return text_type_; }
417
418 RegExpTree* tree() const { return tree_; }
419
420 RegExpAtom* atom() const {
421 DCHECK(text_type() == ATOM);
422 return reinterpret_cast<RegExpAtom*>(tree());
423 }
424
425 RegExpCharacterClass* char_class() const {
426 DCHECK(text_type() == CHAR_CLASS);
427 return reinterpret_cast<RegExpCharacterClass*>(tree());
428 }
429
430 private:
431 TextElement(TextType text_type, RegExpTree* tree)
432 : cp_offset_(-1), text_type_(text_type), tree_(tree) {}
433
434 int cp_offset_;
435 TextType text_type_;
436 RegExpTree* tree_;
437 };
438
439
440 class Trace; 327 class Trace;
441 struct PreloadState; 328 struct PreloadState;
442 class GreedyLoopState; 329 class GreedyLoopState;
443 class AlternativeGenerationList; 330 class AlternativeGenerationList;
444 331
445 struct NodeInfo { 332 struct NodeInfo {
446 NodeInfo() 333 NodeInfo()
447 : being_analyzed(false), 334 : being_analyzed(false),
448 been_analyzed(false), 335 been_analyzed(false),
449 follows_word_interest(false), 336 follows_word_interest(false),
(...skipping 231 matching lines...) Expand 10 before | Expand all | Expand 10 after
681 // generated code is located unless the code is generated at the start of 568 // generated code is located unless the code is generated at the start of
682 // a trace, in which case it is generic and can be reused by flushing the 569 // a trace, in which case it is generic and can be reused by flushing the
683 // deferred operations in the current trace and generating a goto. 570 // deferred operations in the current trace and generating a goto.
684 int trace_count_; 571 int trace_count_;
685 BoyerMooreLookahead* bm_info_[2]; 572 BoyerMooreLookahead* bm_info_[2];
686 573
687 Zone* zone_; 574 Zone* zone_;
688 }; 575 };
689 576
690 577
691 // A simple closed interval.
692 class Interval {
693 public:
694 Interval() : from_(kNone), to_(kNone) { }
695 Interval(int from, int to) : from_(from), to_(to) { }
696 Interval Union(Interval that) {
697 if (that.from_ == kNone)
698 return *this;
699 else if (from_ == kNone)
700 return that;
701 else
702 return Interval(Min(from_, that.from_), Max(to_, that.to_));
703 }
704 bool Contains(int value) {
705 return (from_ <= value) && (value <= to_);
706 }
707 bool is_empty() { return from_ == kNone; }
708 int from() const { return from_; }
709 int to() const { return to_; }
710 static Interval Empty() { return Interval(); }
711 static const int kNone = -1;
712 private:
713 int from_;
714 int to_;
715 };
716
717
718 class SeqRegExpNode: public RegExpNode { 578 class SeqRegExpNode: public RegExpNode {
719 public: 579 public:
720 explicit SeqRegExpNode(RegExpNode* on_success) 580 explicit SeqRegExpNode(RegExpNode* on_success)
721 : RegExpNode(on_success->zone()), on_success_(on_success) { } 581 : RegExpNode(on_success->zone()), on_success_(on_success) { }
722 RegExpNode* on_success() { return on_success_; } 582 RegExpNode* on_success() { return on_success_; }
723 void set_on_success(RegExpNode* node) { on_success_ = node; } 583 void set_on_success(RegExpNode* node) { on_success_ = node; }
724 virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); 584 virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
725 virtual void FillInBMInfo(Isolate* isolate, int offset, int budget, 585 virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
726 BoyerMooreLookahead* bm, bool not_at_start) { 586 BoyerMooreLookahead* bm, bool not_at_start) {
727 on_success_->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); 587 on_success_->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
(...skipping 950 matching lines...) Expand 10 before | Expand all | Expand 10 after
1678 static const int kStringOffset = 0; 1538 static const int kStringOffset = 0;
1679 static const int kPatternOffset = 1; 1539 static const int kPatternOffset = 1;
1680 static const int kArrayOffset = 2; 1540 static const int kArrayOffset = 2;
1681 static const int kLastMatchOffset = 3; 1541 static const int kLastMatchOffset = 3;
1682 }; 1542 };
1683 1543
1684 } // namespace internal 1544 } // namespace internal
1685 } // namespace v8 1545 } // namespace v8
1686 1546
1687 #endif // V8_REGEXP_JSREGEXP_H_ 1547 #endif // V8_REGEXP_JSREGEXP_H_
OLDNEW
« no previous file with comments | « src/parsing/scanner.h ('k') | src/regexp/jsregexp.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698