Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(83)

Side by Side Diff: src/regexp/regexp-ast.h

Issue 2813893002: [regexp] Consider surrogate pairs when optimizing disjunctions (Closed)
Patch Set: DCHECK(!IsLeadSurrogate) Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/regexp/jsregexp.cc ('k') | src/regexp/regexp-parser.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef V8_REGEXP_REGEXP_AST_H_ 5 #ifndef V8_REGEXP_REGEXP_AST_H_
6 #define V8_REGEXP_REGEXP_AST_H_ 6 #define V8_REGEXP_REGEXP_AST_H_
7 7
8 #include "src/objects.h" 8 #include "src/objects.h"
9 #include "src/utils.h" 9 #include "src/utils.h"
10 #include "src/zone/zone-containers.h" 10 #include "src/zone/zone-containers.h"
(...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after
284 int max_match() override { return 0; } 284 int max_match() override { return 0; }
285 AssertionType assertion_type() { return assertion_type_; } 285 AssertionType assertion_type() { return assertion_type_; }
286 286
287 private: 287 private:
288 AssertionType assertion_type_; 288 AssertionType assertion_type_;
289 }; 289 };
290 290
291 291
292 class RegExpCharacterClass final : public RegExpTree { 292 class RegExpCharacterClass final : public RegExpTree {
293 public: 293 public:
294 RegExpCharacterClass(ZoneList<CharacterRange>* ranges, bool is_negated) 294 // NEGATED: The character class is negated and should match everything but
295 : set_(ranges), is_negated_(is_negated) {} 295 // the specified ranges.
296 explicit RegExpCharacterClass(uc16 type) : set_(type), is_negated_(false) {} 296 // CONTAINS_SPLIT_SURROGATE: The character class contains part of a split
297 // surrogate and should not be unicode-desugared (crbug.com/641091).
298 enum Flag {
299 NEGATED = 1 << 0,
300 CONTAINS_SPLIT_SURROGATE = 1 << 1,
301 };
302 typedef base::Flags<Flag> Flags;
303
304 explicit RegExpCharacterClass(ZoneList<CharacterRange>* ranges,
305 Flags flags = Flags())
306 : set_(ranges), flags_(flags) {}
307 explicit RegExpCharacterClass(uc16 type) : set_(type), flags_(0) {}
297 void* Accept(RegExpVisitor* visitor, void* data) override; 308 void* Accept(RegExpVisitor* visitor, void* data) override;
298 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 309 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
299 RegExpCharacterClass* AsCharacterClass() override; 310 RegExpCharacterClass* AsCharacterClass() override;
300 bool IsCharacterClass() override; 311 bool IsCharacterClass() override;
301 bool IsTextElement() override { return true; } 312 bool IsTextElement() override { return true; }
302 int min_match() override { return 1; } 313 int min_match() override { return 1; }
303 // The character class may match two code units for unicode regexps. 314 // The character class may match two code units for unicode regexps.
304 // TODO(yangguo): we should split this class for usage in TextElement, and 315 // TODO(yangguo): we should split this class for usage in TextElement, and
305 // make max_match() dependent on the character class content. 316 // make max_match() dependent on the character class content.
306 int max_match() override { return 2; } 317 int max_match() override { return 2; }
307 void AppendToText(RegExpText* text, Zone* zone) override; 318 void AppendToText(RegExpText* text, Zone* zone) override;
308 CharacterSet character_set() { return set_; } 319 CharacterSet character_set() { return set_; }
309 // TODO(lrn): Remove need for complex version if is_standard that 320 // TODO(lrn): Remove need for complex version if is_standard that
310 // recognizes a mangled standard set and just do { return set_.is_special(); } 321 // recognizes a mangled standard set and just do { return set_.is_special(); }
311 bool is_standard(Zone* zone); 322 bool is_standard(Zone* zone);
312 // Returns a value representing the standard character set if is_standard() 323 // Returns a value representing the standard character set if is_standard()
313 // returns true. 324 // returns true.
314 // Currently used values are: 325 // Currently used values are:
315 // s : unicode whitespace 326 // s : unicode whitespace
316 // S : unicode non-whitespace 327 // S : unicode non-whitespace
317 // w : ASCII word character (digit, letter, underscore) 328 // w : ASCII word character (digit, letter, underscore)
318 // W : non-ASCII word character 329 // W : non-ASCII word character
319 // d : ASCII digit 330 // d : ASCII digit
320 // D : non-ASCII digit 331 // D : non-ASCII digit
321 // . : non-newline 332 // . : non-newline
322 // * : All characters, for advancing unanchored regexp 333 // * : All characters, for advancing unanchored regexp
323 uc16 standard_type() { return set_.standard_set_type(); } 334 uc16 standard_type() { return set_.standard_set_type(); }
324 ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); } 335 ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); }
325 bool is_negated() { return is_negated_; } 336 bool is_negated() const { return (flags_ & NEGATED) != 0; }
337 bool contains_split_surrogate() const {
338 return (flags_ & CONTAINS_SPLIT_SURROGATE) != 0;
339 }
326 340
327 private: 341 private:
328 CharacterSet set_; 342 CharacterSet set_;
329 bool is_negated_; 343 const Flags flags_;
330 }; 344 };
331 345
332 346
333 class RegExpAtom final : public RegExpTree { 347 class RegExpAtom final : public RegExpTree {
334 public: 348 public:
335 explicit RegExpAtom(Vector<const uc16> data) : data_(data) {} 349 explicit RegExpAtom(Vector<const uc16> data) : data_(data) {}
336 void* Accept(RegExpVisitor* visitor, void* data) override; 350 void* Accept(RegExpVisitor* visitor, void* data) override;
337 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 351 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
338 RegExpAtom* AsAtom() override; 352 RegExpAtom* AsAtom() override;
339 bool IsAtom() override; 353 bool IsAtom() override;
(...skipping 208 matching lines...) Expand 10 before | Expand all | Expand 10 after
548 RegExpEmpty* AsEmpty() override; 562 RegExpEmpty* AsEmpty() override;
549 bool IsEmpty() override; 563 bool IsEmpty() override;
550 int min_match() override { return 0; } 564 int min_match() override { return 0; }
551 int max_match() override { return 0; } 565 int max_match() override { return 0; }
552 }; 566 };
553 567
554 } // namespace internal 568 } // namespace internal
555 } // namespace v8 569 } // namespace v8
556 570
557 #endif // V8_REGEXP_REGEXP_AST_H_ 571 #endif // V8_REGEXP_REGEXP_AST_H_
OLDNEW
« no previous file with comments | « src/regexp/jsregexp.cc ('k') | src/regexp/regexp-parser.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698