Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(967)

Unified Diff: src/jsregexp.h

Issue 8732: Character classes (Closed)
Patch Set: Created 12 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | src/jsregexp.cc » ('j') | src/jsregexp.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/jsregexp.h
diff --git a/src/jsregexp.h b/src/jsregexp.h
index 7138316e21d184b421f3c608b23498e972370cbd..2d4351612185ca4806e728b6af03e7e0b18bddeb 100644
--- a/src/jsregexp.h
+++ b/src/jsregexp.h
@@ -126,6 +126,117 @@ class RegExpImpl {
template <typename Char> class RegExpNode;
+class CharacterClassAllocator;
+
+
+class CharacterClass {
+ public:
+
+ enum Type { EMPTY = 0, FIELD = 1, RANGES = 2, UNION = 3 };
+
+ // A closed range from and including 'from', to and including 'to'.
+ class Range {
+ public:
+ Range() : from_(0), to_(0) { }
+ Range(uc16 from, uc16 to) : from_(from), to_(to) { ASSERT(from <= to); }
+ uc16 from() { return from_; }
+ uc16 to() { return to_; }
+ private:
+ uc16 from_;
+ uc16 to_;
+ };
+
+ CharacterClass() : type_(EMPTY) { }
+
+ explicit CharacterClass(Type type) : type_(type) { }
+
+ bool Contains(uc16 c);
+
+ // Returns a character class with a single bit set
+ static inline CharacterClass SingletonField(uc16 chr);
+
+ // Returns a bitfield character class with a closed range set. The
+ // range must fit within one field, that is, fit between two adjacent
+ // kFieldMax-aligned boundaries.
+ static inline CharacterClass RangeField(Range range);
+
+ static inline CharacterClass Union(CharacterClass* left,
+ CharacterClass* right);
+
+ // Initializes an empty charclass as a bitfield containing the
+ // specified ranges.
+ void InitializeFieldFrom(Vector<Range> ranges);
+
+ // Initializes this character class to be the specified ranges.
+ // This class must be empty.
+ void InitializeRangesFrom(Vector<Range> ranges,
+ CharacterClassAllocator* alloc);
+
+ // Creates a new character class containing the specified ranges
+ // and allocating any sub-classes using the specified allocator.
+ static CharacterClass Ranges(Vector<Range> boundaries,
+ CharacterClassAllocator* alloc);
+
+ // Returns one of the built-in character classes such as '\w' or
+ // '\S'.
+ static CharacterClass* GetCharacterClass(uc16 tag);
+
+ inline void write_nibble(int index, byte value);
+ inline byte read_nibble(int index);
+
+ static inline unsigned segment_of(uc16 value);
+ static inline uc16 segment_start(unsigned segment);
+
+ private:
+ static const int kCharSize = 16;
+ static const int kFieldSegmentIndexWidth = 10;
+ static const int kFieldSegmentWidth = kCharSize - kFieldSegmentIndexWidth;
Lasse Reichstein 2008/10/30 10:28:42 Isn't this the field width, not the segment width?
+ static const int kFieldMax = (1 << kFieldSegmentWidth);
+ static const int kSegmentMask = (1 << kFieldSegmentWidth) - 1;
+ static const int kNibbleCount = kFieldMax / 4;
+ STATIC_ASSERT(kFieldMax == 8 * sizeof(uint64_t));
+
+ Type type() { return type_; }
+
+ static inline uint64_t long_bit(int index) {
+ return static_cast<uint64_t>(1) << index;
+ }
+
+ Type type_: 2;
+ unsigned segment_ : 10;
+ unsigned count_ : 4;
+ union {
+ // These have the same type to make it easier to change one without
+ // touching the other.
+ uint64_t u_field;
+ uint64_t u_ranges;
+ struct {
+ CharacterClass* left;
+ CharacterClass* right;
+ } u_union;
Lasse Reichstein 2008/10/30 10:28:42 Slightly confusing name to have for a struct (espe
+ } data_;
+};
+
+
+STATIC_ASSERT(sizeof(CharacterClass) == 3 * kIntSize);
+
+
+class CharacterClassAllocator {
+ public:
+ virtual CharacterClass* Allocate() = 0;
+ virtual ~CharacterClassAllocator() { }
+};
+
+
+template <int kCount>
+class StaticCharacterClassAllocator: public CharacterClassAllocator {
+ public:
+ StaticCharacterClassAllocator() : used_(0) { }
+ virtual CharacterClass* Allocate();
+ private:
+ int used_;
+ CharacterClass preallocated_[kCount];
+};
class RegExpEngine: public AllStatic {
« no previous file with comments | « no previous file | src/jsregexp.cc » ('j') | src/jsregexp.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698