Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(338)

Side by Side Diff: src/regexp/jsregexp.h

Issue 1618753002: Revert of [regexp] implement character classes for unicode regexps. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/ostreams.cc ('k') | src/regexp/jsregexp.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef V8_REGEXP_JSREGEXP_H_ 5 #ifndef V8_REGEXP_JSREGEXP_H_
6 #define V8_REGEXP_JSREGEXP_H_ 6 #define V8_REGEXP_JSREGEXP_H_
7 7
8 #include "src/allocation.h" 8 #include "src/allocation.h"
9 #include "src/assembler.h" 9 #include "src/assembler.h"
10 #include "src/regexp/regexp-ast.h" 10 #include "src/regexp/regexp-ast.h"
(...skipping 247 matching lines...) Expand 10 before | Expand all | Expand 10 after
258 258
259 // A mapping from integers, specified as ranges, to a set of integers. 259 // A mapping from integers, specified as ranges, to a set of integers.
260 // Used for mapping character ranges to choices. 260 // Used for mapping character ranges to choices.
261 class DispatchTable : public ZoneObject { 261 class DispatchTable : public ZoneObject {
262 public: 262 public:
263 explicit DispatchTable(Zone* zone) : tree_(zone) { } 263 explicit DispatchTable(Zone* zone) : tree_(zone) { }
264 264
265 class Entry { 265 class Entry {
266 public: 266 public:
267 Entry() : from_(0), to_(0), out_set_(NULL) { } 267 Entry() : from_(0), to_(0), out_set_(NULL) { }
268 Entry(uc32 from, uc32 to, OutSet* out_set) 268 Entry(uc16 from, uc16 to, OutSet* out_set)
269 : from_(from), to_(to), out_set_(out_set) {} 269 : from_(from), to_(to), out_set_(out_set) { }
270 uc32 from() { return from_; } 270 uc16 from() { return from_; }
271 uc32 to() { return to_; } 271 uc16 to() { return to_; }
272 void set_to(uc32 value) { to_ = value; } 272 void set_to(uc16 value) { to_ = value; }
273 void AddValue(int value, Zone* zone) { 273 void AddValue(int value, Zone* zone) {
274 out_set_ = out_set_->Extend(value, zone); 274 out_set_ = out_set_->Extend(value, zone);
275 } 275 }
276 OutSet* out_set() { return out_set_; } 276 OutSet* out_set() { return out_set_; }
277 private: 277 private:
278 uc32 from_; 278 uc16 from_;
279 uc32 to_; 279 uc16 to_;
280 OutSet* out_set_; 280 OutSet* out_set_;
281 }; 281 };
282 282
283 class Config { 283 class Config {
284 public: 284 public:
285 typedef uc32 Key; 285 typedef uc16 Key;
286 typedef Entry Value; 286 typedef Entry Value;
287 static const uc32 kNoKey; 287 static const uc16 kNoKey;
288 static const Entry NoValue() { return Value(); } 288 static const Entry NoValue() { return Value(); }
289 static inline int Compare(uc32 a, uc32 b) { 289 static inline int Compare(uc16 a, uc16 b) {
290 if (a == b) 290 if (a == b)
291 return 0; 291 return 0;
292 else if (a < b) 292 else if (a < b)
293 return -1; 293 return -1;
294 else 294 else
295 return 1; 295 return 1;
296 } 296 }
297 }; 297 };
298 298
299 void AddRange(CharacterRange range, int value, Zone* zone); 299 void AddRange(CharacterRange range, int value, Zone* zone);
300 OutSet* Get(uc32 value); 300 OutSet* Get(uc16 value);
301 void Dump(); 301 void Dump();
302 302
303 template <typename Callback> 303 template <typename Callback>
304 void ForEach(Callback* callback) { 304 void ForEach(Callback* callback) {
305 return tree()->ForEach(callback); 305 return tree()->ForEach(callback);
306 } 306 }
307 307
308 private: 308 private:
309 // There can't be a static empty set since it allocates its 309 // There can't be a static empty set since it allocates its
310 // successors in a zone and caches them. 310 // successors in a zone and caches them.
311 OutSet* empty() { return &empty_; } 311 OutSet* empty() { return &empty_; }
312 OutSet empty_; 312 OutSet empty_;
313 ZoneSplayTree<Config>* tree() { return &tree_; } 313 ZoneSplayTree<Config>* tree() { return &tree_; }
314 ZoneSplayTree<Config> tree_; 314 ZoneSplayTree<Config> tree_;
315 }; 315 };
316 316
317 317
318 // Categorizes character ranges into BMP, non-BMP, lead, and trail surrogates.
319 class UnicodeRangeSplitter {
320 public:
321 UnicodeRangeSplitter(Zone* zone, ZoneList<CharacterRange>* base);
322 void Call(uc32 from, DispatchTable::Entry entry);
323
324 ZoneList<CharacterRange>* bmp() { return bmp_; }
325 ZoneList<CharacterRange>* lead_surrogates() { return lead_surrogates_; }
326 ZoneList<CharacterRange>* trail_surrogates() { return trail_surrogates_; }
327 ZoneList<CharacterRange>* non_bmp() const { return non_bmp_; }
328
329 private:
330 static const int kBase = 0;
331 // Separate ranges into
332 static const int kBmpCodePoints = 1;
333 static const int kLeadSurrogates = 2;
334 static const int kTrailSurrogates = 3;
335 static const int kNonBmpCodePoints = 4;
336
337 Zone* zone_;
338 DispatchTable table_;
339 ZoneList<CharacterRange>* bmp_;
340 ZoneList<CharacterRange>* lead_surrogates_;
341 ZoneList<CharacterRange>* trail_surrogates_;
342 ZoneList<CharacterRange>* non_bmp_;
343 };
344
345
346 #define FOR_EACH_NODE_TYPE(VISIT) \ 318 #define FOR_EACH_NODE_TYPE(VISIT) \
347 VISIT(End) \ 319 VISIT(End) \
348 VISIT(Action) \ 320 VISIT(Action) \
349 VISIT(Choice) \ 321 VISIT(Choice) \
350 VISIT(BackReference) \ 322 VISIT(BackReference) \
351 VISIT(Assertion) \ 323 VISIT(Assertion) \
352 VISIT(Text) 324 VISIT(Text)
353 325
354 326
355 class Trace; 327 class Trace;
(...skipping 355 matching lines...) Expand 10 before | Expand all | Expand 10 after
711 TextNode(ZoneList<TextElement>* elms, bool read_backward, 683 TextNode(ZoneList<TextElement>* elms, bool read_backward,
712 RegExpNode* on_success) 684 RegExpNode* on_success)
713 : SeqRegExpNode(on_success), elms_(elms), read_backward_(read_backward) {} 685 : SeqRegExpNode(on_success), elms_(elms), read_backward_(read_backward) {}
714 TextNode(RegExpCharacterClass* that, bool read_backward, 686 TextNode(RegExpCharacterClass* that, bool read_backward,
715 RegExpNode* on_success) 687 RegExpNode* on_success)
716 : SeqRegExpNode(on_success), 688 : SeqRegExpNode(on_success),
717 elms_(new (zone()) ZoneList<TextElement>(1, zone())), 689 elms_(new (zone()) ZoneList<TextElement>(1, zone())),
718 read_backward_(read_backward) { 690 read_backward_(read_backward) {
719 elms_->Add(TextElement::CharClass(that), zone()); 691 elms_->Add(TextElement::CharClass(that), zone());
720 } 692 }
721 // Create TextNode for a single character class for the given ranges.
722 static TextNode* CreateForCharacterRanges(Zone* zone,
723 ZoneList<CharacterRange>* ranges,
724 bool read_backward,
725 RegExpNode* on_success);
726 // Create TextNode for a surrogate pair with a range given for the
727 // lead and the trail surrogate each.
728 static TextNode* CreateForSurrogatePair(Zone* zone, CharacterRange lead,
729 CharacterRange trail,
730 bool read_backward,
731 RegExpNode* on_success);
732 virtual void Accept(NodeVisitor* visitor); 693 virtual void Accept(NodeVisitor* visitor);
733 virtual void Emit(RegExpCompiler* compiler, Trace* trace); 694 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
734 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start); 695 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
735 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 696 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
736 RegExpCompiler* compiler, 697 RegExpCompiler* compiler,
737 int characters_filled_in, 698 int characters_filled_in,
738 bool not_at_start); 699 bool not_at_start);
739 ZoneList<TextElement>* elements() { return elms_; } 700 ZoneList<TextElement>* elements() { return elms_; }
740 bool read_backward() { return read_backward_; } 701 bool read_backward() { return read_backward_; }
741 void MakeCaseIndependent(Isolate* isolate, bool is_one_byte); 702 void MakeCaseIndependent(Isolate* isolate, bool is_one_byte);
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after
845 private: 806 private:
846 int start_reg_; 807 int start_reg_;
847 int end_reg_; 808 int end_reg_;
848 bool read_backward_; 809 bool read_backward_;
849 }; 810 };
850 811
851 812
852 class EndNode: public RegExpNode { 813 class EndNode: public RegExpNode {
853 public: 814 public:
854 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; 815 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS };
855 EndNode(Action action, Zone* zone) : RegExpNode(zone), action_(action) {} 816 explicit EndNode(Action action, Zone* zone)
817 : RegExpNode(zone), action_(action) { }
856 virtual void Accept(NodeVisitor* visitor); 818 virtual void Accept(NodeVisitor* visitor);
857 virtual void Emit(RegExpCompiler* compiler, Trace* trace); 819 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
858 virtual int EatsAtLeast(int still_to_find, 820 virtual int EatsAtLeast(int still_to_find,
859 int recursion_depth, 821 int recursion_depth,
860 bool not_at_start) { return 0; } 822 bool not_at_start) { return 0; }
861 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 823 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
862 RegExpCompiler* compiler, 824 RegExpCompiler* compiler,
863 int characters_filled_in, 825 int characters_filled_in,
864 bool not_at_start) { 826 bool not_at_start) {
865 // Returning 0 from EatsAtLeast should ensure we never get here. 827 // Returning 0 from EatsAtLeast should ensure we never get here.
(...skipping 670 matching lines...) Expand 10 before | Expand all | Expand 10 after
1536 code(isolate->heap()->the_hole_value()), 1498 code(isolate->heap()->the_hole_value()),
1537 num_registers(0) {} 1499 num_registers(0) {}
1538 CompilationResult(Object* code, int registers) 1500 CompilationResult(Object* code, int registers)
1539 : error_message(NULL), code(code), num_registers(registers) {} 1501 : error_message(NULL), code(code), num_registers(registers) {}
1540 const char* error_message; 1502 const char* error_message;
1541 Object* code; 1503 Object* code;
1542 int num_registers; 1504 int num_registers;
1543 }; 1505 };
1544 1506
1545 static CompilationResult Compile(Isolate* isolate, Zone* zone, 1507 static CompilationResult Compile(Isolate* isolate, Zone* zone,
1546 RegExpCompileData* input, 1508 RegExpCompileData* input, bool ignore_case,
1547 JSRegExp::Flags flags, 1509 bool global, bool multiline, bool sticky,
1548 Handle<String> pattern, 1510 Handle<String> pattern,
1549 Handle<String> sample_subject, 1511 Handle<String> sample_subject,
1550 bool is_one_byte); 1512 bool is_one_byte);
1551 1513
1552 static bool TooMuchRegExpCode(Handle<String> pattern); 1514 static bool TooMuchRegExpCode(Handle<String> pattern);
1553 1515
1554 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); 1516 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case);
1555 }; 1517 };
1556 1518
1557 1519
(...skipping 18 matching lines...) Expand all
1576 static const int kStringOffset = 0; 1538 static const int kStringOffset = 0;
1577 static const int kPatternOffset = 1; 1539 static const int kPatternOffset = 1;
1578 static const int kArrayOffset = 2; 1540 static const int kArrayOffset = 2;
1579 static const int kLastMatchOffset = 3; 1541 static const int kLastMatchOffset = 3;
1580 }; 1542 };
1581 1543
1582 } // namespace internal 1544 } // namespace internal
1583 } // namespace v8 1545 } // namespace v8
1584 1546
1585 #endif // V8_REGEXP_JSREGEXP_H_ 1547 #endif // V8_REGEXP_JSREGEXP_H_
OLDNEW
« no previous file with comments | « src/ostreams.cc ('k') | src/regexp/jsregexp.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698