OLD | NEW |
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
112 // Generic RegExp methods. Dispatches to implementation specific methods. | 112 // Generic RegExp methods. Dispatches to implementation specific methods. |
113 | 113 |
114 | 114 |
115 class OffsetsVector { | 115 class OffsetsVector { |
116 public: | 116 public: |
117 inline OffsetsVector(int num_registers) | 117 inline OffsetsVector(int num_registers) |
118 : offsets_vector_length_(num_registers) { | 118 : offsets_vector_length_(num_registers) { |
119 if (offsets_vector_length_ > kStaticOffsetsVectorSize) { | 119 if (offsets_vector_length_ > kStaticOffsetsVectorSize) { |
120 vector_ = NewArray<int>(offsets_vector_length_); | 120 vector_ = NewArray<int>(offsets_vector_length_); |
121 } else { | 121 } else { |
122 vector_ = static_offsets_vector_; | 122 int* & static_offsets_vector = |
| 123 v8_context()->reg_exp_stack_data_.static_offsets_vector_; |
| 124 if (!static_offsets_vector) |
| 125 static_offsets_vector = new int[kStaticOffsetsVectorSize]; |
| 126 vector_ = static_offsets_vector; |
123 } | 127 } |
124 } | 128 } |
125 inline ~OffsetsVector() { | 129 inline ~OffsetsVector() { |
126 if (offsets_vector_length_ > kStaticOffsetsVectorSize) { | 130 if (offsets_vector_length_ > kStaticOffsetsVectorSize) { |
127 DeleteArray(vector_); | 131 DeleteArray(vector_); |
128 vector_ = NULL; | 132 vector_ = NULL; |
129 } | 133 } |
130 } | 134 } |
131 inline int* vector() { return vector_; } | 135 inline int* vector() { return vector_; } |
132 inline int length() { return offsets_vector_length_; } | 136 inline int length() { return offsets_vector_length_; } |
133 | 137 |
134 private: | 138 private: |
135 int* vector_; | 139 int* vector_; |
136 int offsets_vector_length_; | 140 int offsets_vector_length_; |
137 static const int kStaticOffsetsVectorSize = 50; | 141 static const int kStaticOffsetsVectorSize = 50; |
138 static int static_offsets_vector_[kStaticOffsetsVectorSize]; | |
139 }; | 142 }; |
140 | 143 |
141 | |
142 int OffsetsVector::static_offsets_vector_[ | |
143 OffsetsVector::kStaticOffsetsVectorSize]; | |
144 | |
145 | |
146 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, | 144 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, |
147 Handle<String> pattern, | 145 Handle<String> pattern, |
148 Handle<String> flag_str) { | 146 Handle<String> flag_str) { |
149 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); | 147 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); |
150 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags); | 148 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags); |
151 bool in_cache = !cached.is_null(); | 149 bool in_cache = !cached.is_null(); |
152 LOG(RegExpCompileEvent(re, in_cache)); | 150 LOG(RegExpCompileEvent(re, in_cache)); |
153 | 151 |
154 Handle<Object> result; | 152 Handle<Object> result; |
155 if (in_cache) { | 153 if (in_cache) { |
(...skipping 1084 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1240 break; | 1238 break; |
1241 case Guard::GEQ: | 1239 case Guard::GEQ: |
1242 ASSERT(!trace->mentions_reg(guard->reg())); | 1240 ASSERT(!trace->mentions_reg(guard->reg())); |
1243 macro_assembler->IfRegisterLT(guard->reg(), | 1241 macro_assembler->IfRegisterLT(guard->reg(), |
1244 guard->value(), | 1242 guard->value(), |
1245 trace->backtrack()); | 1243 trace->backtrack()); |
1246 break; | 1244 break; |
1247 } | 1245 } |
1248 } | 1246 } |
1249 | 1247 |
1250 | |
1251 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize; | |
1252 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange; | |
1253 | |
1254 | |
1255 // Returns the number of characters in the equivalence class, omitting those | 1248 // Returns the number of characters in the equivalence class, omitting those |
1256 // that cannot occur in the source string because it is ASCII. | 1249 // that cannot occur in the source string because it is ASCII. |
1257 static int GetCaseIndependentLetters(uc16 character, | 1250 static int GetCaseIndependentLetters(uc16 character, |
1258 bool ascii_subject, | 1251 bool ascii_subject, |
1259 unibrow::uchar* letters) { | 1252 unibrow::uchar* letters) { |
1260 int length = uncanonicalize.get(character, '\0', letters); | 1253 int length = v8_context()->reg_exp_stack_data_.uncanonicalize_.get( |
| 1254 character, '\0', letters); |
1261 // Unibrow returns 0 or 1 for characters where case independependence is | 1255 // Unibrow returns 0 or 1 for characters where case independependence is |
1262 // trivial. | 1256 // trivial. |
1263 if (length == 0) { | 1257 if (length == 0) { |
1264 letters[0] = character; | 1258 letters[0] = character; |
1265 length = 1; | 1259 length = 1; |
1266 } | 1260 } |
1267 if (!ascii_subject || character <= String::kMaxAsciiCharCode) { | 1261 if (!ascii_subject || character <= String::kMaxAsciiCharCode) { |
1268 return length; | 1262 return length; |
1269 } | 1263 } |
1270 // The standard requires that non-ASCII characters cannot have ASCII | 1264 // The standard requires that non-ASCII characters cannot have ASCII |
(...skipping 2644 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3915 } | 3909 } |
3916 | 3910 |
3917 | 3911 |
3918 static void AddUncanonicals(ZoneList<CharacterRange>* ranges, | 3912 static void AddUncanonicals(ZoneList<CharacterRange>* ranges, |
3919 int bottom, | 3913 int bottom, |
3920 int top); | 3914 int top); |
3921 | 3915 |
3922 | 3916 |
3923 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, | 3917 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, |
3924 bool is_ascii) { | 3918 bool is_ascii) { |
| 3919 unibrow::Mapping<unibrow::Ecma262UnCanonicalize>& uncanonicalize = |
| 3920 v8_context()->reg_exp_stack_data_.uncanonicalize_; |
3925 uc16 bottom = from(); | 3921 uc16 bottom = from(); |
3926 uc16 top = to(); | 3922 uc16 top = to(); |
3927 if (is_ascii) { | 3923 if (is_ascii) { |
3928 if (bottom > String::kMaxAsciiCharCode) return; | 3924 if (bottom > String::kMaxAsciiCharCode) return; |
3929 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode; | 3925 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode; |
3930 } | 3926 } |
3931 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 3927 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
3932 if (top == bottom) { | 3928 if (top == bottom) { |
3933 // If this is a singleton we just expand the one character. | 3929 // If this is a singleton we just expand the one character. |
3934 int length = uncanonicalize.get(bottom, '\0', chars); | 3930 int length = uncanonicalize.get(bottom, '\0', chars); |
(...skipping 16 matching lines...) Expand all Loading... |
3951 // because 'a' uncanonicalizes to ['a', 'A'] and the k'th letter | 3947 // because 'a' uncanonicalizes to ['a', 'A'] and the k'th letter |
3952 // uncanonicalizes to ['a' + k, 'A' + k]. | 3948 // uncanonicalizes to ['a' + k, 'A' + k]. |
3953 // Once we've found the start point we look up its uncanonicalization | 3949 // Once we've found the start point we look up its uncanonicalization |
3954 // and produce a range for each element. For instance for [c-f] | 3950 // and produce a range for each element. For instance for [c-f] |
3955 // we look up ['a', 'A'] and produce [c-f] and [C-F]. We then only | 3951 // we look up ['a', 'A'] and produce [c-f] and [C-F]. We then only |
3956 // add a range if it is not already contained in the input, so [c-f] | 3952 // add a range if it is not already contained in the input, so [c-f] |
3957 // will be skipped but [C-F] will be added. If this range is not | 3953 // will be skipped but [C-F] will be added. If this range is not |
3958 // completely contained in a block we do this for all the blocks | 3954 // completely contained in a block we do this for all the blocks |
3959 // covered by the range. | 3955 // covered by the range. |
3960 unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 3956 unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 3957 unibrow::Mapping<unibrow::CanonicalizationRange>& canonrange = |
| 3958 v8_context()->reg_exp_stack_data_.canonrange_; |
3961 // First, look up the block that contains the 'bottom' character. | 3959 // First, look up the block that contains the 'bottom' character. |
3962 int length = canonrange.get(bottom, '\0', range); | 3960 int length = canonrange.get(bottom, '\0', range); |
3963 if (length == 0) { | 3961 if (length == 0) { |
3964 range[0] = bottom; | 3962 range[0] = bottom; |
3965 } else { | 3963 } else { |
3966 ASSERT_EQ(1, length); | 3964 ASSERT_EQ(1, length); |
3967 } | 3965 } |
3968 int pos = bottom; | 3966 int pos = bottom; |
3969 // The start of the current block. Note that except for the first | 3967 // The start of the current block. Note that except for the first |
3970 // iteration 'start' is always equal to 'pos'. | 3968 // iteration 'start' is always equal to 'pos'. |
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4047 // Split up very large ranges. This helps remove ranges where there are no | 4045 // Split up very large ranges. This helps remove ranges where there are no |
4048 // case mappings. | 4046 // case mappings. |
4049 for (int i = 0; i < boundary_count; i++) { | 4047 for (int i = 0; i < boundary_count; i++) { |
4050 if (bottom < boundaries[i] && top >= boundaries[i]) { | 4048 if (bottom < boundaries[i] && top >= boundaries[i]) { |
4051 AddUncanonicals(ranges, bottom, boundaries[i] - 1); | 4049 AddUncanonicals(ranges, bottom, boundaries[i] - 1); |
4052 AddUncanonicals(ranges, boundaries[i], top); | 4050 AddUncanonicals(ranges, boundaries[i], top); |
4053 return; | 4051 return; |
4054 } | 4052 } |
4055 } | 4053 } |
4056 | 4054 |
| 4055 unibrow::Mapping<unibrow::Ecma262UnCanonicalize>& uncanonicalize = |
| 4056 v8_context()->reg_exp_stack_data_.uncanonicalize_; |
| 4057 |
4057 // If we are completely in a zone with no case mappings then we are done. | 4058 // If we are completely in a zone with no case mappings then we are done. |
4058 // We start at 2 so as not to except the ASCII range from mappings. | 4059 // We start at 2 so as not to except the ASCII range from mappings. |
4059 for (int i = kFirstRealCaselessZoneIndex; i < boundary_count; i += 2) { | 4060 for (int i = kFirstRealCaselessZoneIndex; i < boundary_count; i += 2) { |
4060 if (bottom >= boundaries[i] && top < boundaries[i + 1]) { | 4061 if (bottom >= boundaries[i] && top < boundaries[i + 1]) { |
4061 #ifdef DEBUG | 4062 #ifdef DEBUG |
4062 for (int j = bottom; j <= top; j++) { | 4063 for (int j = bottom; j <= top; j++) { |
4063 unsigned current_char = j; | 4064 unsigned current_char = j; |
4064 int length = uncanonicalize.get(current_char, '\0', chars); | 4065 int length = uncanonicalize.get(current_char, '\0', chars); |
4065 for (int k = 0; k < length; k++) { | 4066 for (int k = 0; k < length; k++) { |
4066 ASSERT(chars[k] == current_char); | 4067 ASSERT(chars[k] == current_char); |
(...skipping 533 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4600 RegExpMacroAssemblerIrregexp macro_assembler(codes); | 4601 RegExpMacroAssemblerIrregexp macro_assembler(codes); |
4601 #endif | 4602 #endif |
4602 | 4603 |
4603 return compiler.Assemble(¯o_assembler, | 4604 return compiler.Assemble(¯o_assembler, |
4604 node, | 4605 node, |
4605 data->capture_count, | 4606 data->capture_count, |
4606 pattern); | 4607 pattern); |
4607 } | 4608 } |
4608 | 4609 |
4609 }} // namespace v8::internal | 4610 }} // namespace v8::internal |
OLD | NEW |