Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(144)

Side by Side Diff: src/regexp/jsregexp.h

Issue 1578253005: [regexp] implement character classes for unicode regexps. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: more tests Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/ostreams.cc ('k') | src/regexp/jsregexp.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef V8_REGEXP_JSREGEXP_H_ 5 #ifndef V8_REGEXP_JSREGEXP_H_
6 #define V8_REGEXP_JSREGEXP_H_ 6 #define V8_REGEXP_JSREGEXP_H_
7 7
8 #include "src/allocation.h" 8 #include "src/allocation.h"
9 #include "src/assembler.h" 9 #include "src/assembler.h"
10 #include "src/regexp/regexp-ast.h" 10 #include "src/regexp/regexp-ast.h"
(...skipping 247 matching lines...) Expand 10 before | Expand all | Expand 10 after
258 258
259 // A mapping from integers, specified as ranges, to a set of integers. 259 // A mapping from integers, specified as ranges, to a set of integers.
260 // Used for mapping character ranges to choices. 260 // Used for mapping character ranges to choices.
261 class DispatchTable : public ZoneObject { 261 class DispatchTable : public ZoneObject {
262 public: 262 public:
263 explicit DispatchTable(Zone* zone) : tree_(zone) { } 263 explicit DispatchTable(Zone* zone) : tree_(zone) { }
264 264
265 class Entry { 265 class Entry {
266 public: 266 public:
267 Entry() : from_(0), to_(0), out_set_(NULL) { } 267 Entry() : from_(0), to_(0), out_set_(NULL) { }
268 Entry(uc16 from, uc16 to, OutSet* out_set) 268 Entry(uc32 from, uc32 to, OutSet* out_set)
269 : from_(from), to_(to), out_set_(out_set) { } 269 : from_(from), to_(to), out_set_(out_set) {}
270 uc16 from() { return from_; } 270 uc32 from() { return from_; }
271 uc16 to() { return to_; } 271 uc32 to() { return to_; }
272 void set_to(uc16 value) { to_ = value; } 272 void set_to(uc32 value) { to_ = value; }
273 void AddValue(int value, Zone* zone) { 273 void AddValue(int value, Zone* zone) {
274 out_set_ = out_set_->Extend(value, zone); 274 out_set_ = out_set_->Extend(value, zone);
275 } 275 }
276 OutSet* out_set() { return out_set_; } 276 OutSet* out_set() { return out_set_; }
277 private: 277 private:
278 uc16 from_; 278 uc32 from_;
279 uc16 to_; 279 uc32 to_;
280 OutSet* out_set_; 280 OutSet* out_set_;
281 }; 281 };
282 282
283 class Config { 283 class Config {
284 public: 284 public:
285 typedef uc16 Key; 285 typedef uc32 Key;
286 typedef Entry Value; 286 typedef Entry Value;
287 static const uc16 kNoKey; 287 static const uc32 kNoKey;
288 static const Entry NoValue() { return Value(); } 288 static const Entry NoValue() { return Value(); }
289 static inline int Compare(uc16 a, uc16 b) { 289 static inline int Compare(uc32 a, uc32 b) {
290 if (a == b) 290 if (a == b)
291 return 0; 291 return 0;
292 else if (a < b) 292 else if (a < b)
293 return -1; 293 return -1;
294 else 294 else
295 return 1; 295 return 1;
296 } 296 }
297 }; 297 };
298 298
299 void AddRange(CharacterRange range, int value, Zone* zone); 299 void AddRange(CharacterRange range, int value, Zone* zone);
300 OutSet* Get(uc16 value); 300 OutSet* Get(uc32 value);
301 void Dump(); 301 void Dump();
302 302
303 template <typename Callback> 303 template <typename Callback>
304 void ForEach(Callback* callback) { 304 void ForEach(Callback* callback) {
305 return tree()->ForEach(callback); 305 return tree()->ForEach(callback);
306 } 306 }
307 307
308 private: 308 private:
309 // There can't be a static empty set since it allocates its 309 // There can't be a static empty set since it allocates its
310 // successors in a zone and caches them. 310 // successors in a zone and caches them.
311 OutSet* empty() { return &empty_; } 311 OutSet* empty() { return &empty_; }
312 OutSet empty_; 312 OutSet empty_;
313 ZoneSplayTree<Config>* tree() { return &tree_; } 313 ZoneSplayTree<Config>* tree() { return &tree_; }
314 ZoneSplayTree<Config> tree_; 314 ZoneSplayTree<Config> tree_;
315 }; 315 };
316 316
317 317
318 // Categorizes character ranges into BMP, non-BMP, lead, and trail surrogates.
319 class UnicodeRangeSplitter {
320 public:
321 UnicodeRangeSplitter(Zone* zone, ZoneList<CharacterRange>* base);
322 void Call(uc32 from, DispatchTable::Entry entry);
323
324 ZoneList<CharacterRange>* bmp() { return bmp_; }
325 ZoneList<CharacterRange>* lead_surrogates() { return lead_surrogates_; }
326 ZoneList<CharacterRange>* trail_surrogates() { return trail_surrogates_; }
327 ZoneList<CharacterRange>* non_bmp() const { return non_bmp_; }
328
329 private:
330 static const int kBase = 0;
331 // Separate ranges into
332 static const int kBmpCodePoints = 1;
333 static const int kLeadSurrogates = 2;
334 static const int kTrailSurrogates = 3;
335 static const int kNonBmpCodePoints = 4;
336
337 Zone* zone_;
338 DispatchTable table_;
339 ZoneList<CharacterRange>* bmp_;
340 ZoneList<CharacterRange>* lead_surrogates_;
341 ZoneList<CharacterRange>* trail_surrogates_;
342 ZoneList<CharacterRange>* non_bmp_;
343 };
344
345
318 #define FOR_EACH_NODE_TYPE(VISIT) \ 346 #define FOR_EACH_NODE_TYPE(VISIT) \
319 VISIT(End) \ 347 VISIT(End) \
320 VISIT(Action) \ 348 VISIT(Action) \
321 VISIT(Choice) \ 349 VISIT(Choice) \
322 VISIT(BackReference) \ 350 VISIT(BackReference) \
323 VISIT(Assertion) \ 351 VISIT(Assertion) \
324 VISIT(Text) 352 VISIT(Text)
325 353
326 354
327 class Trace; 355 class Trace;
(...skipping 355 matching lines...) Expand 10 before | Expand all | Expand 10 after
683 TextNode(ZoneList<TextElement>* elms, bool read_backward, 711 TextNode(ZoneList<TextElement>* elms, bool read_backward,
684 RegExpNode* on_success) 712 RegExpNode* on_success)
685 : SeqRegExpNode(on_success), elms_(elms), read_backward_(read_backward) {} 713 : SeqRegExpNode(on_success), elms_(elms), read_backward_(read_backward) {}
686 TextNode(RegExpCharacterClass* that, bool read_backward, 714 TextNode(RegExpCharacterClass* that, bool read_backward,
687 RegExpNode* on_success) 715 RegExpNode* on_success)
688 : SeqRegExpNode(on_success), 716 : SeqRegExpNode(on_success),
689 elms_(new (zone()) ZoneList<TextElement>(1, zone())), 717 elms_(new (zone()) ZoneList<TextElement>(1, zone())),
690 read_backward_(read_backward) { 718 read_backward_(read_backward) {
691 elms_->Add(TextElement::CharClass(that), zone()); 719 elms_->Add(TextElement::CharClass(that), zone());
692 } 720 }
721 // Create TextNode for a single character class for the given ranges.
722 static TextNode* CreateForCharacterRanges(Zone* zone,
723 ZoneList<CharacterRange>* ranges,
724 bool read_backward,
725 RegExpNode* on_success);
726 // Create TextNode for a surrogate pair with a range given for the
727 // lead and the trail surrogate each.
728 static TextNode* CreateForSurrogatePair(Zone* zone, CharacterRange lead,
729 CharacterRange trail,
730 bool read_backward,
731 RegExpNode* on_success);
693 virtual void Accept(NodeVisitor* visitor); 732 virtual void Accept(NodeVisitor* visitor);
694 virtual void Emit(RegExpCompiler* compiler, Trace* trace); 733 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
695 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start); 734 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
696 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 735 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
697 RegExpCompiler* compiler, 736 RegExpCompiler* compiler,
698 int characters_filled_in, 737 int characters_filled_in,
699 bool not_at_start); 738 bool not_at_start);
700 ZoneList<TextElement>* elements() { return elms_; } 739 ZoneList<TextElement>* elements() { return elms_; }
701 bool read_backward() { return read_backward_; } 740 bool read_backward() { return read_backward_; }
702 void MakeCaseIndependent(Isolate* isolate, bool is_one_byte); 741 void MakeCaseIndependent(Isolate* isolate, bool is_one_byte);
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after
806 private: 845 private:
807 int start_reg_; 846 int start_reg_;
808 int end_reg_; 847 int end_reg_;
809 bool read_backward_; 848 bool read_backward_;
810 }; 849 };
811 850
812 851
813 class EndNode: public RegExpNode { 852 class EndNode: public RegExpNode {
814 public: 853 public:
815 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; 854 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS };
816 explicit EndNode(Action action, Zone* zone) 855 EndNode(Action action, Zone* zone) : RegExpNode(zone), action_(action) {}
817 : RegExpNode(zone), action_(action) { }
818 virtual void Accept(NodeVisitor* visitor); 856 virtual void Accept(NodeVisitor* visitor);
819 virtual void Emit(RegExpCompiler* compiler, Trace* trace); 857 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
820 virtual int EatsAtLeast(int still_to_find, 858 virtual int EatsAtLeast(int still_to_find,
821 int recursion_depth, 859 int recursion_depth,
822 bool not_at_start) { return 0; } 860 bool not_at_start) { return 0; }
823 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 861 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
824 RegExpCompiler* compiler, 862 RegExpCompiler* compiler,
825 int characters_filled_in, 863 int characters_filled_in,
826 bool not_at_start) { 864 bool not_at_start) {
827 // Returning 0 from EatsAtLeast should ensure we never get here. 865 // Returning 0 from EatsAtLeast should ensure we never get here.
(...skipping 670 matching lines...) Expand 10 before | Expand all | Expand 10 after
1498 code(isolate->heap()->the_hole_value()), 1536 code(isolate->heap()->the_hole_value()),
1499 num_registers(0) {} 1537 num_registers(0) {}
1500 CompilationResult(Object* code, int registers) 1538 CompilationResult(Object* code, int registers)
1501 : error_message(NULL), code(code), num_registers(registers) {} 1539 : error_message(NULL), code(code), num_registers(registers) {}
1502 const char* error_message; 1540 const char* error_message;
1503 Object* code; 1541 Object* code;
1504 int num_registers; 1542 int num_registers;
1505 }; 1543 };
1506 1544
1507 static CompilationResult Compile(Isolate* isolate, Zone* zone, 1545 static CompilationResult Compile(Isolate* isolate, Zone* zone,
1508 RegExpCompileData* input, bool ignore_case, 1546 RegExpCompileData* input,
1509 bool global, bool multiline, bool sticky, 1547 JSRegExp::Flags flags,
1510 Handle<String> pattern, 1548 Handle<String> pattern,
1511 Handle<String> sample_subject, 1549 Handle<String> sample_subject,
1512 bool is_one_byte); 1550 bool is_one_byte);
1513 1551
1514 static bool TooMuchRegExpCode(Handle<String> pattern); 1552 static bool TooMuchRegExpCode(Handle<String> pattern);
1515 1553
1516 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); 1554 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case);
1517 }; 1555 };
1518 1556
1519 1557
(...skipping 18 matching lines...) Expand all
1538 static const int kStringOffset = 0; 1576 static const int kStringOffset = 0;
1539 static const int kPatternOffset = 1; 1577 static const int kPatternOffset = 1;
1540 static const int kArrayOffset = 2; 1578 static const int kArrayOffset = 2;
1541 static const int kLastMatchOffset = 3; 1579 static const int kLastMatchOffset = 3;
1542 }; 1580 };
1543 1581
1544 } // namespace internal 1582 } // namespace internal
1545 } // namespace v8 1583 } // namespace v8
1546 1584
1547 #endif // V8_REGEXP_JSREGEXP_H_ 1585 #endif // V8_REGEXP_JSREGEXP_H_
OLDNEW
« no previous file with comments | « src/ostreams.cc ('k') | src/regexp/jsregexp.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698