Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: test/cctest/test-regexp.cc

Issue 11349: Character range uncanonicalization. (Closed)
Patch Set: Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/unicode.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 794 matching lines...) Expand 10 before | Expand all | Expand 10 after
805 805
806 806
807 TEST(SimplePropagation) { 807 TEST(SimplePropagation) {
808 v8::HandleScope scope; 808 v8::HandleScope scope;
809 ZoneScope zone_scope(DELETE_ON_EXIT); 809 ZoneScope zone_scope(DELETE_ON_EXIT);
810 RegExpNode* node = Compile("(a|^b|c)"); 810 RegExpNode* node = Compile("(a|^b|c)");
811 CHECK(node->info()->determine_start); 811 CHECK(node->info()->determine_start);
812 } 812 }
813 813
814 814
815 static uc32 CanonRange(uc32 c) {
816 unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth];
817 int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, NULL);
818 if (count == 0) {
819 return c;
820 } else {
821 CHECK_EQ(1, count);
822 return canon[0];
823 }
824 }
825
826
827 TEST(RangeCanonicalization) {
828 ASSERT((CanonRange(0) & CharacterRange::kStartMarker) != 0);
829 // Check that we arrive at the same result when using the basic
830 // range canonicalization primitives as when using immediate
831 // canonicalization.
832 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
833 for (int i = 0; i < CharacterRange::kRangeCanonicalizeMax; i++) {
834 int range = CanonRange(i);
835 int indirect_length = 0;
836 unibrow::uchar indirect[unibrow::Ecma262UnCanonicalize::kMaxWidth];
837 if ((range & CharacterRange::kStartMarker) == 0) {
838 indirect_length = un_canonicalize.get(i - range, '\0', indirect);
839 for (int i = 0; i < indirect_length; i++)
840 indirect[i] += range;
841 } else {
842 indirect_length = un_canonicalize.get(i, '\0', indirect);
843 }
844 unibrow::uchar direct[unibrow::Ecma262UnCanonicalize::kMaxWidth];
845 int direct_length = un_canonicalize.get(i, '\0', direct);
846 CHECK_EQ(direct_length, indirect_length);
847 }
848 // Check that we arrive at the same results when skipping over
849 // canonicalization ranges.
850 int next_block = 0;
851 while (next_block < CharacterRange::kRangeCanonicalizeMax) {
852 uc32 start = CanonRange(next_block);
853 CHECK((start & CharacterRange::kStartMarker) != 0);
854 unsigned dist = start & CharacterRange::kPayloadMask;
855 unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth];
856 int first_length = un_canonicalize.get(next_block, '\0', first);
857 for (unsigned i = 1; i < dist; i++) {
858 CHECK_EQ(i, CanonRange(i));
859 unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth];
860 int succ_length = un_canonicalize.get(next_block + i, '\0', succ);
861 CHECK_EQ(first_length, succ_length);
862 for (int j = 0; j < succ_length; j++) {
863 int calc = first[j] + i;
864 int found = succ[j];
865 CHECK_EQ(calc, found);
866 }
867 }
868 next_block = next_block + dist;
869 }
870 }
871
872
873 static void TestRangeCaseIndependence(CharacterRange input,
874 Vector<CharacterRange> expected) {
875 ZoneScope zone_scope(DELETE_ON_EXIT);
876 int count = expected.length();
877 ZoneList<CharacterRange>* list = new ZoneList<CharacterRange>(count);
878 input.AddCaseEquivalents(list);
879 CHECK_EQ(count, list->length());
880 for (int i = 0; i < list->length(); i++) {
881 CHECK_EQ(expected[i].from(), list->at(i).from());
882 CHECK_EQ(expected[i].to(), list->at(i).to());
883 }
884 }
885
886
887 static void TestSimpleRangeCaseIndependence(CharacterRange input,
888 CharacterRange expected) {
889 EmbeddedVector<CharacterRange, 1> vector;
890 vector[0] = expected;
891 TestRangeCaseIndependence(input, vector);
892 }
893
894
895 TEST(CharacterRangeCaseIndependence) {
896 TestSimpleRangeCaseIndependence(CharacterRange::Singleton('a'),
897 CharacterRange::Singleton('A'));
898 TestSimpleRangeCaseIndependence(CharacterRange::Singleton('z'),
899 CharacterRange::Singleton('Z'));
900 TestSimpleRangeCaseIndependence(CharacterRange('a', 'z'),
901 CharacterRange('A', 'Z'));
902 TestSimpleRangeCaseIndependence(CharacterRange('c', 'f'),
903 CharacterRange('C', 'F'));
904 TestSimpleRangeCaseIndependence(CharacterRange('a', 'b'),
905 CharacterRange('A', 'B'));
906 TestSimpleRangeCaseIndependence(CharacterRange('y', 'z'),
907 CharacterRange('Y', 'Z'));
908 TestSimpleRangeCaseIndependence(CharacterRange('a' - 1, 'z' + 1),
909 CharacterRange('A', 'Z'));
910 TestSimpleRangeCaseIndependence(CharacterRange('A', 'Z'),
911 CharacterRange('a', 'z'));
912 TestSimpleRangeCaseIndependence(CharacterRange('C', 'F'),
913 CharacterRange('c', 'f'));
914 TestSimpleRangeCaseIndependence(CharacterRange('A' - 1, 'Z' + 1),
915 CharacterRange('a', 'z'));
916 // Here we need to add [l-z] to complete the case independence of
917 // [A-Za-z] but we expect [a-z] to be added since we always add a
918 // whole block at a time.
919 TestSimpleRangeCaseIndependence(CharacterRange('A', 'k'),
920 CharacterRange('a', 'z'));
921 }
922
923
815 TEST(Graph) { 924 TEST(Graph) {
816 V8::Initialize(NULL); 925 V8::Initialize(NULL);
817 Execute(".*o(?=o)", "", true); 926 Execute("(a|^b|c)", "", false);
818 } 927 }
OLDNEW
« no previous file with comments | « src/unicode.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698