Index: test/cctest/test-regexp.cc |
diff --git a/test/cctest/test-regexp.cc b/test/cctest/test-regexp.cc |
index c72c4d19fc40a52cce879a0c4e6f2b6611917877..ad68a4568d015d307e922ecc01a1dd99cd7ca889 100644 |
--- a/test/cctest/test-regexp.cc |
+++ b/test/cctest/test-regexp.cc |
@@ -1650,6 +1650,163 @@ TEST(CanonicalizeCharacterSets) { |
ASSERT_EQ(30, list->at(0).to()); |
} |
+// Checks whether a character is in the set represented by a list of ranges. |
+static bool CharacterInSet(ZoneList<CharacterRange>* set, uc16 value) { |
+ for (int i = 0; i < set->length(); i++) { |
+ CharacterRange range = set->at(i); |
+ if (range.from() <= value && value <= range.to()) { |
+ return true; |
+ } |
+ } |
+ return false; |
+} |
+ |
+TEST(CharacterRangeMerge) { |
+ ZoneScope zone_scope(DELETE_ON_EXIT); |
+ ZoneList<CharacterRange> l1(4); |
+ ZoneList<CharacterRange> l2(4); |
+ // Create all combinations of intersections of ranges, both singletons and |
+ // longer. |
+ |
+ int offset = 0; |
+ |
+ // The five kinds of singleton intersections: |
+ // X |
+ // Y - outside before |
+ // Y - outside touching start |
+ // Y - overlap |
+ // Y - outside touching end |
+ // Y - outside after |
+ |
+ for (int i = 0; i < 5; i++) { |
+ l1.Add(CharacterRange::Singleton(offset + 2)); |
+ l2.Add(CharacterRange::Singleton(offset + i)); |
+ offset += 6; |
+ } |
+ |
+ // The seven kinds of singleton/non-singleton intersections: |
+ // XXX |
+ // Y - outside before |
+ // Y - outside touching start |
+ // Y - inside touching start |
+ // Y - entirely inside |
+ // Y - inside touching end |
+ // Y - outside touching end |
+ // Y - disjoint after |
+ |
+ for (int i = 0; i < 7; i++) { |
+ l1.Add(CharacterRange::Range(offset + 2, offset + 4)); |
+ l2.Add(CharacterRange::Singleton(offset + i)); |
+ offset += 8; |
+ } |
+ |
+ // The eleven kinds of non-singleton intersections: |
+ // |
+ // XXXXXXXX |
+ // YYYY - outside before. |
+ // YYYY - outside touching start. |
+ // YYYY - overlapping start |
+ // YYYY - inside touching start |
+ // YYYY - entirely inside |
+ // YYYY - inside touching end |
+ // YYYY - overlapping end |
+ // YYYY - outside touching end |
+ // YYYY - outside after |
+ // YYYYYYYY - identical |
+ // YYYYYYYYYYYY - containing entirely. |
+ |
+ for (int i = 0; i < 9; i++) { |
+ l1.Add(CharacterRange::Range(offset + 6, offset + 15)); // Length 8. |
+ l2.Add(CharacterRange::Range(offset + 2 * i, offset + 2 * i + 3)); |
+ offset += 22; |
+ } |
+ l1.Add(CharacterRange::Range(offset + 6, offset + 15)); |
+ l2.Add(CharacterRange::Range(offset + 6, offset + 15)); |
+ offset += 22; |
+ l1.Add(CharacterRange::Range(offset + 6, offset + 15)); |
+ l2.Add(CharacterRange::Range(offset + 4, offset + 17)); |
+ offset += 22; |
+ |
+ // Different kinds of multi-range overlap: |
+ // XXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXX |
+ // YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y |
+ |
+ l1.Add(CharacterRange::Range(offset, offset + 21)); |
+ l1.Add(CharacterRange::Range(offset + 31, offset + 52)); |
+ for (int i = 0; i < 6; i++) { |
+ l2.Add(CharacterRange::Range(offset + 2, offset + 5)); |
+ l2.Add(CharacterRange::Singleton(offset + 8)); |
+ offset += 9; |
+ } |
+ |
+ ASSERT(CharacterRange::IsCanonical(&l1)); |
+ ASSERT(CharacterRange::IsCanonical(&l2)); |
+ |
+ ZoneList<CharacterRange> first_only(4); |
+ ZoneList<CharacterRange> second_only(4); |
+ ZoneList<CharacterRange> both(4); |
+ |
+ // Merge one direction. |
+ CharacterRange::Merge(&l1, &l2, &first_only, &second_only, &both); |
+ |
+ CHECK(CharacterRange::IsCanonical(&first_only)); |
+ CHECK(CharacterRange::IsCanonical(&second_only)); |
+ CHECK(CharacterRange::IsCanonical(&both)); |
+ |
+ for (uc16 i = 0; i < offset; i++) { |
+ bool in_first = CharacterInSet(&l1, i); |
+ bool in_second = CharacterInSet(&l2, i); |
+ CHECK((in_first && !in_second) == CharacterInSet(&first_only, i)); |
+ CHECK((!in_first && in_second) == CharacterInSet(&second_only, i)); |
+ CHECK((in_first && in_second) == CharacterInSet(&both, i)); |
+ } |
+ |
+ first_only.Clear(); |
+ second_only.Clear(); |
+ both.Clear(); |
+ |
+ // Merge other direction. |
+ CharacterRange::Merge(&l2, &l1, &second_only, &first_only, &both); |
+ |
+ CHECK(CharacterRange::IsCanonical(&first_only)); |
+ CHECK(CharacterRange::IsCanonical(&second_only)); |
+ CHECK(CharacterRange::IsCanonical(&both)); |
+ |
+ for (uc16 i = 0; i < offset; i++) { |
+ bool in_first = CharacterInSet(&l1, i); |
+ bool in_second = CharacterInSet(&l2, i); |
+ CHECK((in_first && !in_second) == CharacterInSet(&first_only, i)); |
+ CHECK((!in_first && in_second) == CharacterInSet(&second_only, i)); |
+ CHECK((in_first && in_second) == CharacterInSet(&both, i)); |
+ } |
+ |
+ first_only.Clear(); |
+ second_only.Clear(); |
+ both.Clear(); |
+ |
+ // Merge but don't record all combinations. |
+ CharacterRange::Merge(&l1, &l2, NULL, NULL, &both); |
+ |
+ CHECK(CharacterRange::IsCanonical(&both)); |
+ |
+ for (uc16 i = 0; i < offset; i++) { |
+ bool in_first = CharacterInSet(&l1, i); |
+ bool in_second = CharacterInSet(&l2, i); |
+ CHECK((in_first && in_second) == CharacterInSet(&both, i)); |
+ } |
+ |
+ // Merge into same set. |
+ ZoneList<CharacterRange> all(4); |
+ CharacterRange::Merge(&l1, &l2, &all, &all, &all); |
+ |
+ CHECK(CharacterRange::IsCanonical(&all)); |
+ |
+ for (uc16 i = 0; i < offset; i++) { |
+ bool in_first = CharacterInSet(&l1, i); |
+ bool in_second = CharacterInSet(&l2, i); |
+ CHECK((in_first || in_second) == CharacterInSet(&all, i)); |
+ } |
+} |
TEST(Graph) { |