| OLD | NEW |
| 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 112 // Generic RegExp methods. Dispatches to implementation specific methods. | 112 // Generic RegExp methods. Dispatches to implementation specific methods. |
| 113 | 113 |
| 114 | 114 |
| 115 class OffsetsVector { | 115 class OffsetsVector { |
| 116 public: | 116 public: |
| 117 inline OffsetsVector(int num_registers) | 117 inline OffsetsVector(int num_registers) |
| 118 : offsets_vector_length_(num_registers) { | 118 : offsets_vector_length_(num_registers) { |
| 119 if (offsets_vector_length_ > kStaticOffsetsVectorSize) { | 119 if (offsets_vector_length_ > kStaticOffsetsVectorSize) { |
| 120 vector_ = NewArray<int>(offsets_vector_length_); | 120 vector_ = NewArray<int>(offsets_vector_length_); |
| 121 } else { | 121 } else { |
| 122 vector_ = static_offsets_vector_; | 122 int* & static_offsets_vector = |
| 123 v8_context()->reg_exp_stack_data_.static_offsets_vector_; |
| 124 if (!static_offsets_vector) |
| 125 static_offsets_vector = new int[kStaticOffsetsVectorSize]; |
| 126 vector_ = static_offsets_vector; |
| 123 } | 127 } |
| 124 } | 128 } |
| 125 inline ~OffsetsVector() { | 129 inline ~OffsetsVector() { |
| 126 if (offsets_vector_length_ > kStaticOffsetsVectorSize) { | 130 if (offsets_vector_length_ > kStaticOffsetsVectorSize) { |
| 127 DeleteArray(vector_); | 131 DeleteArray(vector_); |
| 128 vector_ = NULL; | 132 vector_ = NULL; |
| 129 } | 133 } |
| 130 } | 134 } |
| 131 inline int* vector() { return vector_; } | 135 inline int* vector() { return vector_; } |
| 132 inline int length() { return offsets_vector_length_; } | 136 inline int length() { return offsets_vector_length_; } |
| 133 | 137 |
| 134 private: | 138 private: |
| 135 int* vector_; | 139 int* vector_; |
| 136 int offsets_vector_length_; | 140 int offsets_vector_length_; |
| 137 static const int kStaticOffsetsVectorSize = 50; | 141 static const int kStaticOffsetsVectorSize = 50; |
| 138 static int static_offsets_vector_[kStaticOffsetsVectorSize]; | |
| 139 }; | 142 }; |
| 140 | 143 |
| 141 | |
| 142 int OffsetsVector::static_offsets_vector_[ | |
| 143 OffsetsVector::kStaticOffsetsVectorSize]; | |
| 144 | |
| 145 | |
| 146 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, | 144 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, |
| 147 Handle<String> pattern, | 145 Handle<String> pattern, |
| 148 Handle<String> flag_str) { | 146 Handle<String> flag_str) { |
| 149 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); | 147 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); |
| 150 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags); | 148 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags); |
| 151 bool in_cache = !cached.is_null(); | 149 bool in_cache = !cached.is_null(); |
| 152 LOG(RegExpCompileEvent(re, in_cache)); | 150 LOG(RegExpCompileEvent(re, in_cache)); |
| 153 | 151 |
| 154 Handle<Object> result; | 152 Handle<Object> result; |
| 155 if (in_cache) { | 153 if (in_cache) { |
| (...skipping 1084 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1240 break; | 1238 break; |
| 1241 case Guard::GEQ: | 1239 case Guard::GEQ: |
| 1242 ASSERT(!trace->mentions_reg(guard->reg())); | 1240 ASSERT(!trace->mentions_reg(guard->reg())); |
| 1243 macro_assembler->IfRegisterLT(guard->reg(), | 1241 macro_assembler->IfRegisterLT(guard->reg(), |
| 1244 guard->value(), | 1242 guard->value(), |
| 1245 trace->backtrack()); | 1243 trace->backtrack()); |
| 1246 break; | 1244 break; |
| 1247 } | 1245 } |
| 1248 } | 1246 } |
| 1249 | 1247 |
| 1250 | |
| 1251 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize; | |
| 1252 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange; | |
| 1253 | |
| 1254 | |
| 1255 // Returns the number of characters in the equivalence class, omitting those | 1248 // Returns the number of characters in the equivalence class, omitting those |
| 1256 // that cannot occur in the source string because it is ASCII. | 1249 // that cannot occur in the source string because it is ASCII. |
| 1257 static int GetCaseIndependentLetters(uc16 character, | 1250 static int GetCaseIndependentLetters(uc16 character, |
| 1258 bool ascii_subject, | 1251 bool ascii_subject, |
| 1259 unibrow::uchar* letters) { | 1252 unibrow::uchar* letters) { |
| 1260 int length = uncanonicalize.get(character, '\0', letters); | 1253 int length = v8_context()->reg_exp_stack_data_.uncanonicalize_.get( |
| 1254 character, '\0', letters); |
| 1261 // Unibrow returns 0 or 1 for characters where case independependence is | 1255 // Unibrow returns 0 or 1 for characters where case independependence is |
| 1262 // trivial. | 1256 // trivial. |
| 1263 if (length == 0) { | 1257 if (length == 0) { |
| 1264 letters[0] = character; | 1258 letters[0] = character; |
| 1265 length = 1; | 1259 length = 1; |
| 1266 } | 1260 } |
| 1267 if (!ascii_subject || character <= String::kMaxAsciiCharCode) { | 1261 if (!ascii_subject || character <= String::kMaxAsciiCharCode) { |
| 1268 return length; | 1262 return length; |
| 1269 } | 1263 } |
| 1270 // The standard requires that non-ASCII characters cannot have ASCII | 1264 // The standard requires that non-ASCII characters cannot have ASCII |
| (...skipping 2644 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3915 } | 3909 } |
| 3916 | 3910 |
| 3917 | 3911 |
| 3918 static void AddUncanonicals(ZoneList<CharacterRange>* ranges, | 3912 static void AddUncanonicals(ZoneList<CharacterRange>* ranges, |
| 3919 int bottom, | 3913 int bottom, |
| 3920 int top); | 3914 int top); |
| 3921 | 3915 |
| 3922 | 3916 |
| 3923 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, | 3917 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, |
| 3924 bool is_ascii) { | 3918 bool is_ascii) { |
| 3919 unibrow::Mapping<unibrow::Ecma262UnCanonicalize>& uncanonicalize = |
| 3920 v8_context()->reg_exp_stack_data_.uncanonicalize_; |
| 3925 uc16 bottom = from(); | 3921 uc16 bottom = from(); |
| 3926 uc16 top = to(); | 3922 uc16 top = to(); |
| 3927 if (is_ascii) { | 3923 if (is_ascii) { |
| 3928 if (bottom > String::kMaxAsciiCharCode) return; | 3924 if (bottom > String::kMaxAsciiCharCode) return; |
| 3929 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode; | 3925 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode; |
| 3930 } | 3926 } |
| 3931 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 3927 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 3932 if (top == bottom) { | 3928 if (top == bottom) { |
| 3933 // If this is a singleton we just expand the one character. | 3929 // If this is a singleton we just expand the one character. |
| 3934 int length = uncanonicalize.get(bottom, '\0', chars); | 3930 int length = uncanonicalize.get(bottom, '\0', chars); |
| (...skipping 16 matching lines...) Expand all Loading... |
| 3951 // because 'a' uncanonicalizes to ['a', 'A'] and the k'th letter | 3947 // because 'a' uncanonicalizes to ['a', 'A'] and the k'th letter |
| 3952 // uncanonicalizes to ['a' + k, 'A' + k]. | 3948 // uncanonicalizes to ['a' + k, 'A' + k]. |
| 3953 // Once we've found the start point we look up its uncanonicalization | 3949 // Once we've found the start point we look up its uncanonicalization |
| 3954 // and produce a range for each element. For instance for [c-f] | 3950 // and produce a range for each element. For instance for [c-f] |
| 3955 // we look up ['a', 'A'] and produce [c-f] and [C-F]. We then only | 3951 // we look up ['a', 'A'] and produce [c-f] and [C-F]. We then only |
| 3956 // add a range if it is not already contained in the input, so [c-f] | 3952 // add a range if it is not already contained in the input, so [c-f] |
| 3957 // will be skipped but [C-F] will be added. If this range is not | 3953 // will be skipped but [C-F] will be added. If this range is not |
| 3958 // completely contained in a block we do this for all the blocks | 3954 // completely contained in a block we do this for all the blocks |
| 3959 // covered by the range. | 3955 // covered by the range. |
| 3960 unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 3956 unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 3957 unibrow::Mapping<unibrow::CanonicalizationRange>& canonrange = |
| 3958 v8_context()->reg_exp_stack_data_.canonrange_; |
| 3961 // First, look up the block that contains the 'bottom' character. | 3959 // First, look up the block that contains the 'bottom' character. |
| 3962 int length = canonrange.get(bottom, '\0', range); | 3960 int length = canonrange.get(bottom, '\0', range); |
| 3963 if (length == 0) { | 3961 if (length == 0) { |
| 3964 range[0] = bottom; | 3962 range[0] = bottom; |
| 3965 } else { | 3963 } else { |
| 3966 ASSERT_EQ(1, length); | 3964 ASSERT_EQ(1, length); |
| 3967 } | 3965 } |
| 3968 int pos = bottom; | 3966 int pos = bottom; |
| 3969 // The start of the current block. Note that except for the first | 3967 // The start of the current block. Note that except for the first |
| 3970 // iteration 'start' is always equal to 'pos'. | 3968 // iteration 'start' is always equal to 'pos'. |
| (...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4047 // Split up very large ranges. This helps remove ranges where there are no | 4045 // Split up very large ranges. This helps remove ranges where there are no |
| 4048 // case mappings. | 4046 // case mappings. |
| 4049 for (int i = 0; i < boundary_count; i++) { | 4047 for (int i = 0; i < boundary_count; i++) { |
| 4050 if (bottom < boundaries[i] && top >= boundaries[i]) { | 4048 if (bottom < boundaries[i] && top >= boundaries[i]) { |
| 4051 AddUncanonicals(ranges, bottom, boundaries[i] - 1); | 4049 AddUncanonicals(ranges, bottom, boundaries[i] - 1); |
| 4052 AddUncanonicals(ranges, boundaries[i], top); | 4050 AddUncanonicals(ranges, boundaries[i], top); |
| 4053 return; | 4051 return; |
| 4054 } | 4052 } |
| 4055 } | 4053 } |
| 4056 | 4054 |
| 4055 unibrow::Mapping<unibrow::Ecma262UnCanonicalize>& uncanonicalize = |
| 4056 v8_context()->reg_exp_stack_data_.uncanonicalize_; |
| 4057 |
| 4057 // If we are completely in a zone with no case mappings then we are done. | 4058 // If we are completely in a zone with no case mappings then we are done. |
| 4058 // We start at 2 so as not to except the ASCII range from mappings. | 4059 // We start at 2 so as not to except the ASCII range from mappings. |
| 4059 for (int i = kFirstRealCaselessZoneIndex; i < boundary_count; i += 2) { | 4060 for (int i = kFirstRealCaselessZoneIndex; i < boundary_count; i += 2) { |
| 4060 if (bottom >= boundaries[i] && top < boundaries[i + 1]) { | 4061 if (bottom >= boundaries[i] && top < boundaries[i + 1]) { |
| 4061 #ifdef DEBUG | 4062 #ifdef DEBUG |
| 4062 for (int j = bottom; j <= top; j++) { | 4063 for (int j = bottom; j <= top; j++) { |
| 4063 unsigned current_char = j; | 4064 unsigned current_char = j; |
| 4064 int length = uncanonicalize.get(current_char, '\0', chars); | 4065 int length = uncanonicalize.get(current_char, '\0', chars); |
| 4065 for (int k = 0; k < length; k++) { | 4066 for (int k = 0; k < length; k++) { |
| 4066 ASSERT(chars[k] == current_char); | 4067 ASSERT(chars[k] == current_char); |
| (...skipping 533 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4600 RegExpMacroAssemblerIrregexp macro_assembler(codes); | 4601 RegExpMacroAssemblerIrregexp macro_assembler(codes); |
| 4601 #endif | 4602 #endif |
| 4602 | 4603 |
| 4603 return compiler.Assemble(¯o_assembler, | 4604 return compiler.Assemble(¯o_assembler, |
| 4604 node, | 4605 node, |
| 4605 data->capture_count, | 4606 data->capture_count, |
| 4606 pattern); | 4607 pattern); |
| 4607 } | 4608 } |
| 4608 | 4609 |
| 4609 }} // namespace v8::internal | 4610 }} // namespace v8::internal |
| OLD | NEW |