OLD | NEW |
1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef V8_REGEXP_REGEXP_AST_H_ | 5 #ifndef V8_REGEXP_REGEXP_AST_H_ |
6 #define V8_REGEXP_REGEXP_AST_H_ | 6 #define V8_REGEXP_REGEXP_AST_H_ |
7 | 7 |
8 #include "src/objects.h" | 8 #include "src/objects.h" |
9 #include "src/utils.h" | 9 #include "src/utils.h" |
10 #include "src/zone/zone-containers.h" | 10 #include "src/zone/zone-containers.h" |
(...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
284 int max_match() override { return 0; } | 284 int max_match() override { return 0; } |
285 AssertionType assertion_type() { return assertion_type_; } | 285 AssertionType assertion_type() { return assertion_type_; } |
286 | 286 |
287 private: | 287 private: |
288 AssertionType assertion_type_; | 288 AssertionType assertion_type_; |
289 }; | 289 }; |
290 | 290 |
291 | 291 |
292 class RegExpCharacterClass final : public RegExpTree { | 292 class RegExpCharacterClass final : public RegExpTree { |
293 public: | 293 public: |
294 RegExpCharacterClass(ZoneList<CharacterRange>* ranges, bool is_negated) | 294 // NEGATED: The character class is negated and should match everything but |
295 : set_(ranges), is_negated_(is_negated) {} | 295 // the specified ranges. |
296 explicit RegExpCharacterClass(uc16 type) : set_(type), is_negated_(false) {} | 296 // CONTAINS_SPLIT_SURROGATE: The character class contains part of a split |
| 297 // surrogate and should not be unicode-desugared (crbug.com/641091). |
| 298 enum Flag { |
| 299 NEGATED = 1 << 0, |
| 300 CONTAINS_SPLIT_SURROGATE = 1 << 1, |
| 301 }; |
| 302 typedef base::Flags<Flag> Flags; |
| 303 |
| 304 explicit RegExpCharacterClass(ZoneList<CharacterRange>* ranges, |
| 305 Flags flags = Flags()) |
| 306 : set_(ranges), flags_(flags) {} |
| 307 explicit RegExpCharacterClass(uc16 type) : set_(type), flags_(0) {} |
297 void* Accept(RegExpVisitor* visitor, void* data) override; | 308 void* Accept(RegExpVisitor* visitor, void* data) override; |
298 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; | 309 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; |
299 RegExpCharacterClass* AsCharacterClass() override; | 310 RegExpCharacterClass* AsCharacterClass() override; |
300 bool IsCharacterClass() override; | 311 bool IsCharacterClass() override; |
301 bool IsTextElement() override { return true; } | 312 bool IsTextElement() override { return true; } |
302 int min_match() override { return 1; } | 313 int min_match() override { return 1; } |
303 // The character class may match two code units for unicode regexps. | 314 // The character class may match two code units for unicode regexps. |
304 // TODO(yangguo): we should split this class for usage in TextElement, and | 315 // TODO(yangguo): we should split this class for usage in TextElement, and |
305 // make max_match() dependent on the character class content. | 316 // make max_match() dependent on the character class content. |
306 int max_match() override { return 2; } | 317 int max_match() override { return 2; } |
307 void AppendToText(RegExpText* text, Zone* zone) override; | 318 void AppendToText(RegExpText* text, Zone* zone) override; |
308 CharacterSet character_set() { return set_; } | 319 CharacterSet character_set() { return set_; } |
309 // TODO(lrn): Remove need for complex version if is_standard that | 320 // TODO(lrn): Remove need for complex version if is_standard that |
310 // recognizes a mangled standard set and just do { return set_.is_special(); } | 321 // recognizes a mangled standard set and just do { return set_.is_special(); } |
311 bool is_standard(Zone* zone); | 322 bool is_standard(Zone* zone); |
312 // Returns a value representing the standard character set if is_standard() | 323 // Returns a value representing the standard character set if is_standard() |
313 // returns true. | 324 // returns true. |
314 // Currently used values are: | 325 // Currently used values are: |
315 // s : unicode whitespace | 326 // s : unicode whitespace |
316 // S : unicode non-whitespace | 327 // S : unicode non-whitespace |
317 // w : ASCII word character (digit, letter, underscore) | 328 // w : ASCII word character (digit, letter, underscore) |
318 // W : non-ASCII word character | 329 // W : non-ASCII word character |
319 // d : ASCII digit | 330 // d : ASCII digit |
320 // D : non-ASCII digit | 331 // D : non-ASCII digit |
321 // . : non-newline | 332 // . : non-newline |
322 // * : All characters, for advancing unanchored regexp | 333 // * : All characters, for advancing unanchored regexp |
323 uc16 standard_type() { return set_.standard_set_type(); } | 334 uc16 standard_type() { return set_.standard_set_type(); } |
324 ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); } | 335 ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); } |
325 bool is_negated() { return is_negated_; } | 336 bool is_negated() const { return (flags_ & NEGATED) != 0; } |
| 337 bool contains_split_surrogate() const { |
| 338 return (flags_ & CONTAINS_SPLIT_SURROGATE) != 0; |
| 339 } |
326 | 340 |
327 private: | 341 private: |
328 CharacterSet set_; | 342 CharacterSet set_; |
329 bool is_negated_; | 343 const Flags flags_; |
330 }; | 344 }; |
331 | 345 |
332 | 346 |
333 class RegExpAtom final : public RegExpTree { | 347 class RegExpAtom final : public RegExpTree { |
334 public: | 348 public: |
335 explicit RegExpAtom(Vector<const uc16> data) : data_(data) {} | 349 explicit RegExpAtom(Vector<const uc16> data) : data_(data) {} |
336 void* Accept(RegExpVisitor* visitor, void* data) override; | 350 void* Accept(RegExpVisitor* visitor, void* data) override; |
337 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; | 351 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; |
338 RegExpAtom* AsAtom() override; | 352 RegExpAtom* AsAtom() override; |
339 bool IsAtom() override; | 353 bool IsAtom() override; |
(...skipping 208 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
548 RegExpEmpty* AsEmpty() override; | 562 RegExpEmpty* AsEmpty() override; |
549 bool IsEmpty() override; | 563 bool IsEmpty() override; |
550 int min_match() override { return 0; } | 564 int min_match() override { return 0; } |
551 int max_match() override { return 0; } | 565 int max_match() override { return 0; } |
552 }; | 566 }; |
553 | 567 |
554 } // namespace internal | 568 } // namespace internal |
555 } // namespace v8 | 569 } // namespace v8 |
556 | 570 |
557 #endif // V8_REGEXP_REGEXP_AST_H_ | 571 #endif // V8_REGEXP_REGEXP_AST_H_ |
OLD | NEW |