Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(189)

Side by Side Diff: src/regexp/regexp-ast.h

Issue 1599303002: [regexp] implement case-insensitive unicode regexps. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@unicodeclass
Patch Set: fixes Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef V8_REGEXP_REGEXP_AST_H_ 5 #ifndef V8_REGEXP_REGEXP_AST_H_
6 #define V8_REGEXP_REGEXP_AST_H_ 6 #define V8_REGEXP_REGEXP_AST_H_
7 7
8 #include "src/objects.h" 8 #include "src/objects.h"
9 #include "src/utils.h" 9 #include "src/utils.h"
10 #include "src/zone.h" 10 #include "src/zone.h"
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
101 return list; 101 return list;
102 } 102 }
103 bool Contains(uc32 i) { return from_ <= i && i <= to_; } 103 bool Contains(uc32 i) { return from_ <= i && i <= to_; }
104 uc32 from() const { return from_; } 104 uc32 from() const { return from_; }
105 void set_from(uc32 value) { from_ = value; } 105 void set_from(uc32 value) { from_ = value; }
106 uc32 to() const { return to_; } 106 uc32 to() const { return to_; }
107 void set_to(uc32 value) { to_ = value; } 107 void set_to(uc32 value) { to_ = value; }
108 bool is_valid() { return from_ <= to_; } 108 bool is_valid() { return from_ <= to_; }
109 bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; } 109 bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; }
110 bool IsSingleton() { return (from_ == to_); } 110 bool IsSingleton() { return (from_ == to_); }
111 void AddCaseEquivalents(Isolate* isolate, Zone* zone, 111 static void AddCaseEquivalents(Isolate* isolate, Zone* zone,
112 ZoneList<CharacterRange>* ranges, bool is_one_byte); 112 ZoneList<CharacterRange>* ranges,
113 bool is_one_byte);
113 // Whether a range list is in canonical form: Ranges ordered by from value, 114 // Whether a range list is in canonical form: Ranges ordered by from value,
114 // and ranges non-overlapping and non-adjacent. 115 // and ranges non-overlapping and non-adjacent.
115 static bool IsCanonical(ZoneList<CharacterRange>* ranges); 116 static bool IsCanonical(ZoneList<CharacterRange>* ranges);
116 // Convert range list to canonical form. The characters covered by the ranges 117 // Convert range list to canonical form. The characters covered by the ranges
117 // will still be the same, but no character is in more than one range, and 118 // will still be the same, but no character is in more than one range, and
118 // adjacent ranges are merged. The resulting list may be shorter than the 119 // adjacent ranges are merged. The resulting list may be shorter than the
119 // original, but cannot be longer. 120 // original, but cannot be longer.
120 static void Canonicalize(ZoneList<CharacterRange>* ranges); 121 static void Canonicalize(ZoneList<CharacterRange>* ranges);
121 // Negate the contents of a character range in canonical form. 122 // Negate the contents of a character range in canonical form.
122 static void Negate(ZoneList<CharacterRange>* src, 123 static void Negate(ZoneList<CharacterRange>* src,
(...skipping 163 matching lines...) Expand 10 before | Expand all | Expand 10 after
286 class RegExpCharacterClass final : public RegExpTree { 287 class RegExpCharacterClass final : public RegExpTree {
287 public: 288 public:
288 RegExpCharacterClass(ZoneList<CharacterRange>* ranges, bool is_negated) 289 RegExpCharacterClass(ZoneList<CharacterRange>* ranges, bool is_negated)
289 : set_(ranges), is_negated_(is_negated) {} 290 : set_(ranges), is_negated_(is_negated) {}
290 explicit RegExpCharacterClass(uc16 type) : set_(type), is_negated_(false) {} 291 explicit RegExpCharacterClass(uc16 type) : set_(type), is_negated_(false) {}
291 void* Accept(RegExpVisitor* visitor, void* data) override; 292 void* Accept(RegExpVisitor* visitor, void* data) override;
292 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 293 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
293 RegExpCharacterClass* AsCharacterClass() override; 294 RegExpCharacterClass* AsCharacterClass() override;
294 bool IsCharacterClass() override; 295 bool IsCharacterClass() override;
295 bool IsTextElement() override { return true; } 296 bool IsTextElement() override { return true; }
296 bool NeedsDesugaringForUnicode(Zone* zone);
297 int min_match() override { return 1; } 297 int min_match() override { return 1; }
298 int max_match() override { return 1; } 298 int max_match() override { return 1; }
299 void AppendToText(RegExpText* text, Zone* zone) override; 299 void AppendToText(RegExpText* text, Zone* zone) override;
300 CharacterSet character_set() { return set_; } 300 CharacterSet character_set() { return set_; }
301 // TODO(lrn): Remove need for complex version if is_standard that 301 // TODO(lrn): Remove need for complex version if is_standard that
302 // recognizes a mangled standard set and just do { return set_.is_special(); } 302 // recognizes a mangled standard set and just do { return set_.is_special(); }
303 bool is_standard(Zone* zone); 303 bool is_standard(Zone* zone);
304 // Returns a value representing the standard character set if is_standard() 304 // Returns a value representing the standard character set if is_standard()
305 // returns true. 305 // returns true.
306 // Currently used values are: 306 // Currently used values are:
307 // s : unicode whitespace 307 // s : unicode whitespace
308 // S : unicode non-whitespace 308 // S : unicode non-whitespace
309 // w : ASCII word character (digit, letter, underscore) 309 // w : ASCII word character (digit, letter, underscore)
310 // W : non-ASCII word character 310 // W : non-ASCII word character
311 // d : ASCII digit 311 // d : ASCII digit
312 // D : non-ASCII digit 312 // D : non-ASCII digit
313 // . : non-unicode non-newline 313 // . : non-newline
314 // * : All characters, for advancing unanchored regexp 314 // * : All characters, for advancing unanchored regexp
315 uc16 standard_type() { return set_.standard_set_type(); } 315 uc16 standard_type() { return set_.standard_set_type(); }
316 ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); } 316 ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); }
317 bool is_negated() { return is_negated_; } 317 bool is_negated() { return is_negated_; }
318 318
319 private: 319 private:
320 CharacterSet set_; 320 CharacterSet set_;
321 bool is_negated_; 321 bool is_negated_;
322 }; 322 };
323 323
(...skipping 186 matching lines...) Expand 10 before | Expand all | Expand 10 after
510 RegExpEmpty* AsEmpty() override; 510 RegExpEmpty* AsEmpty() override;
511 bool IsEmpty() override; 511 bool IsEmpty() override;
512 int min_match() override { return 0; } 512 int min_match() override { return 0; }
513 int max_match() override { return 0; } 513 int max_match() override { return 0; }
514 }; 514 };
515 515
516 } // namespace internal 516 } // namespace internal
517 } // namespace v8 517 } // namespace v8
518 518
519 #endif // V8_REGEXP_REGEXP_AST_H_ 519 #endif // V8_REGEXP_REGEXP_AST_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698