| OLD | NEW |
| 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 17 matching lines...) Expand all Loading... |
| 28 #include "v8.h" | 28 #include "v8.h" |
| 29 | 29 |
| 30 #include "ast.h" | 30 #include "ast.h" |
| 31 #include "compiler.h" | 31 #include "compiler.h" |
| 32 #include "execution.h" | 32 #include "execution.h" |
| 33 #include "factory.h" | 33 #include "factory.h" |
| 34 #include "jsregexp.h" | 34 #include "jsregexp.h" |
| 35 #include "platform.h" | 35 #include "platform.h" |
| 36 #include "string-search.h" | 36 #include "string-search.h" |
| 37 #include "runtime.h" | 37 #include "runtime.h" |
| 38 #include "top.h" | |
| 39 #include "compilation-cache.h" | 38 #include "compilation-cache.h" |
| 40 #include "string-stream.h" | 39 #include "string-stream.h" |
| 41 #include "parser.h" | 40 #include "parser.h" |
| 42 #include "regexp-macro-assembler.h" | 41 #include "regexp-macro-assembler.h" |
| 43 #include "regexp-macro-assembler-tracer.h" | 42 #include "regexp-macro-assembler-tracer.h" |
| 44 #include "regexp-macro-assembler-irregexp.h" | 43 #include "regexp-macro-assembler-irregexp.h" |
| 45 #include "regexp-stack.h" | 44 #include "regexp-stack.h" |
| 46 | 45 |
| 47 #ifndef V8_INTERPRETED_REGEXP | 46 #ifndef V8_INTERPRETED_REGEXP |
| 48 #if V8_TARGET_ARCH_IA32 | 47 #if V8_TARGET_ARCH_IA32 |
| 49 #include "ia32/regexp-macro-assembler-ia32.h" | 48 #include "ia32/regexp-macro-assembler-ia32.h" |
| 50 #elif V8_TARGET_ARCH_X64 | 49 #elif V8_TARGET_ARCH_X64 |
| 51 #include "x64/regexp-macro-assembler-x64.h" | 50 #include "x64/regexp-macro-assembler-x64.h" |
| 52 #elif V8_TARGET_ARCH_ARM | 51 #elif V8_TARGET_ARCH_ARM |
| 53 #include "arm/regexp-macro-assembler-arm.h" | 52 #include "arm/regexp-macro-assembler-arm.h" |
| 54 #else | 53 #else |
| 55 #error Unsupported target architecture. | 54 #error Unsupported target architecture. |
| 56 #endif | 55 #endif |
| 57 #endif | 56 #endif |
| 58 | 57 |
| 59 #include "interpreter-irregexp.h" | 58 #include "interpreter-irregexp.h" |
| 60 | 59 |
| 61 | 60 |
| 62 namespace v8 { | 61 namespace v8 { |
| 63 namespace internal { | 62 namespace internal { |
| 64 | 63 |
| 65 | |
| 66 Handle<Object> RegExpImpl::CreateRegExpLiteral(Handle<JSFunction> constructor, | 64 Handle<Object> RegExpImpl::CreateRegExpLiteral(Handle<JSFunction> constructor, |
| 67 Handle<String> pattern, | 65 Handle<String> pattern, |
| 68 Handle<String> flags, | 66 Handle<String> flags, |
| 69 bool* has_pending_exception) { | 67 bool* has_pending_exception) { |
| 70 // Call the construct code with 2 arguments. | 68 // Call the construct code with 2 arguments. |
| 71 Object** argv[2] = { Handle<Object>::cast(pattern).location(), | 69 Object** argv[2] = { Handle<Object>::cast(pattern).location(), |
| 72 Handle<Object>::cast(flags).location() }; | 70 Handle<Object>::cast(flags).location() }; |
| 73 return Execution::New(constructor, 2, argv, has_pending_exception); | 71 return Execution::New(constructor, 2, argv, has_pending_exception); |
| 74 } | 72 } |
| 75 | 73 |
| (...skipping 14 matching lines...) Expand all Loading... |
| 90 } | 88 } |
| 91 } | 89 } |
| 92 return JSRegExp::Flags(flags); | 90 return JSRegExp::Flags(flags); |
| 93 } | 91 } |
| 94 | 92 |
| 95 | 93 |
| 96 static inline void ThrowRegExpException(Handle<JSRegExp> re, | 94 static inline void ThrowRegExpException(Handle<JSRegExp> re, |
| 97 Handle<String> pattern, | 95 Handle<String> pattern, |
| 98 Handle<String> error_text, | 96 Handle<String> error_text, |
| 99 const char* message) { | 97 const char* message) { |
| 100 Handle<FixedArray> elements = Factory::NewFixedArray(2); | 98 Isolate* isolate = re->GetIsolate(); |
| 99 Factory* factory = isolate->factory(); |
| 100 Handle<FixedArray> elements = factory->NewFixedArray(2); |
| 101 elements->set(0, *pattern); | 101 elements->set(0, *pattern); |
| 102 elements->set(1, *error_text); | 102 elements->set(1, *error_text); |
| 103 Handle<JSArray> array = Factory::NewJSArrayWithElements(elements); | 103 Handle<JSArray> array = factory->NewJSArrayWithElements(elements); |
| 104 Handle<Object> regexp_err = Factory::NewSyntaxError(message, array); | 104 Handle<Object> regexp_err = factory->NewSyntaxError(message, array); |
| 105 Top::Throw(*regexp_err); | 105 isolate->Throw(*regexp_err); |
| 106 } | 106 } |
| 107 | 107 |
| 108 | 108 |
| 109 // Generic RegExp methods. Dispatches to implementation specific methods. | 109 // Generic RegExp methods. Dispatches to implementation specific methods. |
| 110 | 110 |
| 111 | 111 |
| 112 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, | 112 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, |
| 113 Handle<String> pattern, | 113 Handle<String> pattern, |
| 114 Handle<String> flag_str) { | 114 Handle<String> flag_str) { |
| 115 Isolate* isolate = re->GetIsolate(); |
| 115 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); | 116 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); |
| 116 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags); | 117 CompilationCache* compilation_cache = isolate->compilation_cache(); |
| 118 Handle<FixedArray> cached = compilation_cache->LookupRegExp(pattern, flags); |
| 117 bool in_cache = !cached.is_null(); | 119 bool in_cache = !cached.is_null(); |
| 118 LOG(RegExpCompileEvent(re, in_cache)); | 120 LOG(isolate, RegExpCompileEvent(re, in_cache)); |
| 119 | 121 |
| 120 Handle<Object> result; | 122 Handle<Object> result; |
| 121 if (in_cache) { | 123 if (in_cache) { |
| 122 re->set_data(*cached); | 124 re->set_data(*cached); |
| 123 return re; | 125 return re; |
| 124 } | 126 } |
| 125 pattern = FlattenGetString(pattern); | 127 pattern = FlattenGetString(pattern); |
| 126 CompilationZoneScope zone_scope(DELETE_ON_EXIT); | 128 CompilationZoneScope zone_scope(DELETE_ON_EXIT); |
| 127 PostponeInterruptsScope postpone; | 129 PostponeInterruptsScope postpone(isolate); |
| 128 RegExpCompileData parse_result; | 130 RegExpCompileData parse_result; |
| 129 FlatStringReader reader(pattern); | 131 FlatStringReader reader(isolate, pattern); |
| 130 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), | 132 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), |
| 131 &parse_result)) { | 133 &parse_result)) { |
| 132 // Throw an exception if we fail to parse the pattern. | 134 // Throw an exception if we fail to parse the pattern. |
| 133 ThrowRegExpException(re, | 135 ThrowRegExpException(re, |
| 134 pattern, | 136 pattern, |
| 135 parse_result.error, | 137 parse_result.error, |
| 136 "malformed_regexp"); | 138 "malformed_regexp"); |
| 137 return Handle<Object>::null(); | 139 return Handle<Object>::null(); |
| 138 } | 140 } |
| 139 | 141 |
| 140 if (parse_result.simple && !flags.is_ignore_case()) { | 142 if (parse_result.simple && !flags.is_ignore_case()) { |
| 141 // Parse-tree is a single atom that is equal to the pattern. | 143 // Parse-tree is a single atom that is equal to the pattern. |
| 142 AtomCompile(re, pattern, flags, pattern); | 144 AtomCompile(re, pattern, flags, pattern); |
| 143 } else if (parse_result.tree->IsAtom() && | 145 } else if (parse_result.tree->IsAtom() && |
| 144 !flags.is_ignore_case() && | 146 !flags.is_ignore_case() && |
| 145 parse_result.capture_count == 0) { | 147 parse_result.capture_count == 0) { |
| 146 RegExpAtom* atom = parse_result.tree->AsAtom(); | 148 RegExpAtom* atom = parse_result.tree->AsAtom(); |
| 147 Vector<const uc16> atom_pattern = atom->data(); | 149 Vector<const uc16> atom_pattern = atom->data(); |
| 148 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern); | 150 Handle<String> atom_string = |
| 151 isolate->factory()->NewStringFromTwoByte(atom_pattern); |
| 149 AtomCompile(re, pattern, flags, atom_string); | 152 AtomCompile(re, pattern, flags, atom_string); |
| 150 } else { | 153 } else { |
| 151 IrregexpInitialize(re, pattern, flags, parse_result.capture_count); | 154 IrregexpInitialize(re, pattern, flags, parse_result.capture_count); |
| 152 } | 155 } |
| 153 ASSERT(re->data()->IsFixedArray()); | 156 ASSERT(re->data()->IsFixedArray()); |
| 154 // Compilation succeeded so the data is set on the regexp | 157 // Compilation succeeded so the data is set on the regexp |
| 155 // and we can store it in the cache. | 158 // and we can store it in the cache. |
| 156 Handle<FixedArray> data(FixedArray::cast(re->data())); | 159 Handle<FixedArray> data(FixedArray::cast(re->data())); |
| 157 CompilationCache::PutRegExp(pattern, flags, data); | 160 compilation_cache->PutRegExp(pattern, flags, data); |
| 158 | 161 |
| 159 return re; | 162 return re; |
| 160 } | 163 } |
| 161 | 164 |
| 162 | 165 |
| 163 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, | 166 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, |
| 164 Handle<String> subject, | 167 Handle<String> subject, |
| 165 int index, | 168 int index, |
| 166 Handle<JSArray> last_match_info) { | 169 Handle<JSArray> last_match_info) { |
| 167 switch (regexp->TypeTag()) { | 170 switch (regexp->TypeTag()) { |
| 168 case JSRegExp::ATOM: | 171 case JSRegExp::ATOM: |
| 169 return AtomExec(regexp, subject, index, last_match_info); | 172 return AtomExec(regexp, subject, index, last_match_info); |
| 170 case JSRegExp::IRREGEXP: { | 173 case JSRegExp::IRREGEXP: { |
| 171 Handle<Object> result = | 174 Handle<Object> result = |
| 172 IrregexpExec(regexp, subject, index, last_match_info); | 175 IrregexpExec(regexp, subject, index, last_match_info); |
| 173 ASSERT(!result.is_null() || Top::has_pending_exception()); | 176 ASSERT(!result.is_null() || Isolate::Current()->has_pending_exception()); |
| 174 return result; | 177 return result; |
| 175 } | 178 } |
| 176 default: | 179 default: |
| 177 UNREACHABLE(); | 180 UNREACHABLE(); |
| 178 return Handle<Object>::null(); | 181 return Handle<Object>::null(); |
| 179 } | 182 } |
| 180 } | 183 } |
| 181 | 184 |
| 182 | 185 |
| 183 // RegExp Atom implementation: Simple string search using indexOf. | 186 // RegExp Atom implementation: Simple string search using indexOf. |
| 184 | 187 |
| 185 | 188 |
| 186 void RegExpImpl::AtomCompile(Handle<JSRegExp> re, | 189 void RegExpImpl::AtomCompile(Handle<JSRegExp> re, |
| 187 Handle<String> pattern, | 190 Handle<String> pattern, |
| 188 JSRegExp::Flags flags, | 191 JSRegExp::Flags flags, |
| 189 Handle<String> match_pattern) { | 192 Handle<String> match_pattern) { |
| 190 Factory::SetRegExpAtomData(re, | 193 re->GetIsolate()->factory()->SetRegExpAtomData(re, |
| 191 JSRegExp::ATOM, | 194 JSRegExp::ATOM, |
| 192 pattern, | 195 pattern, |
| 193 flags, | 196 flags, |
| 194 match_pattern); | 197 match_pattern); |
| 195 } | 198 } |
| 196 | 199 |
| 197 | 200 |
| 198 static void SetAtomLastCapture(FixedArray* array, | 201 static void SetAtomLastCapture(FixedArray* array, |
| 199 String* subject, | 202 String* subject, |
| 200 int from, | 203 int from, |
| 201 int to) { | 204 int to) { |
| 202 NoHandleAllocation no_handles; | 205 NoHandleAllocation no_handles; |
| 203 RegExpImpl::SetLastCaptureCount(array, 2); | 206 RegExpImpl::SetLastCaptureCount(array, 2); |
| 204 RegExpImpl::SetLastSubject(array, subject); | 207 RegExpImpl::SetLastSubject(array, subject); |
| (...skipping 12 matching lines...) Expand all Loading... |
| 217 | 220 |
| 218 int subject_length = sub_vector.length(); | 221 int subject_length = sub_vector.length(); |
| 219 if (start_index + pattern_length > subject_length) return -1; | 222 if (start_index + pattern_length > subject_length) return -1; |
| 220 return SearchString(sub_vector, pat_vector, start_index); | 223 return SearchString(sub_vector, pat_vector, start_index); |
| 221 } | 224 } |
| 222 */ | 225 */ |
| 223 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, | 226 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, |
| 224 Handle<String> subject, | 227 Handle<String> subject, |
| 225 int index, | 228 int index, |
| 226 Handle<JSArray> last_match_info) { | 229 Handle<JSArray> last_match_info) { |
| 230 Isolate* isolate = re->GetIsolate(); |
| 231 |
| 227 ASSERT(0 <= index); | 232 ASSERT(0 <= index); |
| 228 ASSERT(index <= subject->length()); | 233 ASSERT(index <= subject->length()); |
| 229 | 234 |
| 230 if (!subject->IsFlat()) FlattenString(subject); | 235 if (!subject->IsFlat()) FlattenString(subject); |
| 231 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid | 236 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid |
| 232 // Extract flattened substrings of cons strings before determining asciiness. | 237 // Extract flattened substrings of cons strings before determining asciiness. |
| 233 String* seq_sub = *subject; | 238 String* seq_sub = *subject; |
| 234 if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first(); | 239 if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first(); |
| 235 | 240 |
| 236 String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)); | 241 String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)); |
| 237 int needle_len = needle->length(); | 242 int needle_len = needle->length(); |
| 238 | 243 |
| 239 if (needle_len != 0) { | 244 if (needle_len != 0) { |
| 240 if (index + needle_len > subject->length()) return Factory::null_value(); | 245 if (index + needle_len > subject->length()) |
| 246 return isolate->factory()->null_value(); |
| 247 |
| 241 // dispatch on type of strings | 248 // dispatch on type of strings |
| 242 index = (needle->IsAsciiRepresentation() | 249 index = (needle->IsAsciiRepresentation() |
| 243 ? (seq_sub->IsAsciiRepresentation() | 250 ? (seq_sub->IsAsciiRepresentation() |
| 244 ? SearchString(seq_sub->ToAsciiVector(), | 251 ? SearchString(isolate, |
| 252 seq_sub->ToAsciiVector(), |
| 245 needle->ToAsciiVector(), | 253 needle->ToAsciiVector(), |
| 246 index) | 254 index) |
| 247 : SearchString(seq_sub->ToUC16Vector(), | 255 : SearchString(isolate, |
| 256 seq_sub->ToUC16Vector(), |
| 248 needle->ToAsciiVector(), | 257 needle->ToAsciiVector(), |
| 249 index)) | 258 index)) |
| 250 : (seq_sub->IsAsciiRepresentation() | 259 : (seq_sub->IsAsciiRepresentation() |
| 251 ? SearchString(seq_sub->ToAsciiVector(), | 260 ? SearchString(isolate, |
| 261 seq_sub->ToAsciiVector(), |
| 252 needle->ToUC16Vector(), | 262 needle->ToUC16Vector(), |
| 253 index) | 263 index) |
| 254 : SearchString(seq_sub->ToUC16Vector(), | 264 : SearchString(isolate, |
| 265 seq_sub->ToUC16Vector(), |
| 255 needle->ToUC16Vector(), | 266 needle->ToUC16Vector(), |
| 256 index))); | 267 index))); |
| 257 if (index == -1) return Factory::null_value(); | 268 if (index == -1) return FACTORY->null_value(); |
| 258 } | 269 } |
| 259 ASSERT(last_match_info->HasFastElements()); | 270 ASSERT(last_match_info->HasFastElements()); |
| 260 | 271 |
| 261 { | 272 { |
| 262 NoHandleAllocation no_handles; | 273 NoHandleAllocation no_handles; |
| 263 FixedArray* array = FixedArray::cast(last_match_info->elements()); | 274 FixedArray* array = FixedArray::cast(last_match_info->elements()); |
| 264 SetAtomLastCapture(array, *subject, index, index + needle_len); | 275 SetAtomLastCapture(array, *subject, index, index + needle_len); |
| 265 } | 276 } |
| 266 return last_match_info; | 277 return last_match_info; |
| 267 } | 278 } |
| (...skipping 13 matching lines...) Expand all Loading... |
| 281 if (compiled_code->IsByteArray()) return true; | 292 if (compiled_code->IsByteArray()) return true; |
| 282 #else // V8_INTERPRETED_REGEXP (RegExp native code) | 293 #else // V8_INTERPRETED_REGEXP (RegExp native code) |
| 283 if (compiled_code->IsCode()) return true; | 294 if (compiled_code->IsCode()) return true; |
| 284 #endif | 295 #endif |
| 285 return CompileIrregexp(re, is_ascii); | 296 return CompileIrregexp(re, is_ascii); |
| 286 } | 297 } |
| 287 | 298 |
| 288 | 299 |
| 289 bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, bool is_ascii) { | 300 bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, bool is_ascii) { |
| 290 // Compile the RegExp. | 301 // Compile the RegExp. |
| 302 Isolate* isolate = re->GetIsolate(); |
| 291 CompilationZoneScope zone_scope(DELETE_ON_EXIT); | 303 CompilationZoneScope zone_scope(DELETE_ON_EXIT); |
| 292 PostponeInterruptsScope postpone; | 304 PostponeInterruptsScope postpone(isolate); |
| 293 Object* entry = re->DataAt(JSRegExp::code_index(is_ascii)); | 305 Object* entry = re->DataAt(JSRegExp::code_index(is_ascii)); |
| 294 if (entry->IsJSObject()) { | 306 if (entry->IsJSObject()) { |
| 295 // If it's a JSObject, a previous compilation failed and threw this object. | 307 // If it's a JSObject, a previous compilation failed and threw this object. |
| 296 // Re-throw the object without trying again. | 308 // Re-throw the object without trying again. |
| 297 Top::Throw(entry); | 309 isolate->Throw(entry); |
| 298 return false; | 310 return false; |
| 299 } | 311 } |
| 300 ASSERT(entry->IsTheHole()); | 312 ASSERT(entry->IsTheHole()); |
| 301 | 313 |
| 302 JSRegExp::Flags flags = re->GetFlags(); | 314 JSRegExp::Flags flags = re->GetFlags(); |
| 303 | 315 |
| 304 Handle<String> pattern(re->Pattern()); | 316 Handle<String> pattern(re->Pattern()); |
| 305 if (!pattern->IsFlat()) { | 317 if (!pattern->IsFlat()) { |
| 306 FlattenString(pattern); | 318 FlattenString(pattern); |
| 307 } | 319 } |
| 308 | 320 |
| 309 RegExpCompileData compile_data; | 321 RegExpCompileData compile_data; |
| 310 FlatStringReader reader(pattern); | 322 FlatStringReader reader(isolate, pattern); |
| 311 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), | 323 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), |
| 312 &compile_data)) { | 324 &compile_data)) { |
| 313 // Throw an exception if we fail to parse the pattern. | 325 // Throw an exception if we fail to parse the pattern. |
| 314 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once. | 326 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once. |
| 315 ThrowRegExpException(re, | 327 ThrowRegExpException(re, |
| 316 pattern, | 328 pattern, |
| 317 compile_data.error, | 329 compile_data.error, |
| 318 "malformed_regexp"); | 330 "malformed_regexp"); |
| 319 return false; | 331 return false; |
| 320 } | 332 } |
| 321 RegExpEngine::CompilationResult result = | 333 RegExpEngine::CompilationResult result = |
| 322 RegExpEngine::Compile(&compile_data, | 334 RegExpEngine::Compile(&compile_data, |
| 323 flags.is_ignore_case(), | 335 flags.is_ignore_case(), |
| 324 flags.is_multiline(), | 336 flags.is_multiline(), |
| 325 pattern, | 337 pattern, |
| 326 is_ascii); | 338 is_ascii); |
| 327 if (result.error_message != NULL) { | 339 if (result.error_message != NULL) { |
| 328 // Unable to compile regexp. | 340 // Unable to compile regexp. |
| 329 Handle<FixedArray> elements = Factory::NewFixedArray(2); | 341 Factory* factory = isolate->factory(); |
| 342 Handle<FixedArray> elements = factory->NewFixedArray(2); |
| 330 elements->set(0, *pattern); | 343 elements->set(0, *pattern); |
| 331 Handle<String> error_message = | 344 Handle<String> error_message = |
| 332 Factory::NewStringFromUtf8(CStrVector(result.error_message)); | 345 factory->NewStringFromUtf8(CStrVector(result.error_message)); |
| 333 elements->set(1, *error_message); | 346 elements->set(1, *error_message); |
| 334 Handle<JSArray> array = Factory::NewJSArrayWithElements(elements); | 347 Handle<JSArray> array = factory->NewJSArrayWithElements(elements); |
| 335 Handle<Object> regexp_err = | 348 Handle<Object> regexp_err = |
| 336 Factory::NewSyntaxError("malformed_regexp", array); | 349 factory->NewSyntaxError("malformed_regexp", array); |
| 337 Top::Throw(*regexp_err); | 350 isolate->Throw(*regexp_err); |
| 338 re->SetDataAt(JSRegExp::code_index(is_ascii), *regexp_err); | 351 re->SetDataAt(JSRegExp::code_index(is_ascii), *regexp_err); |
| 339 return false; | 352 return false; |
| 340 } | 353 } |
| 341 | 354 |
| 342 Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data())); | 355 Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data())); |
| 343 data->set(JSRegExp::code_index(is_ascii), result.code); | 356 data->set(JSRegExp::code_index(is_ascii), result.code); |
| 344 int register_max = IrregexpMaxRegisterCount(*data); | 357 int register_max = IrregexpMaxRegisterCount(*data); |
| 345 if (result.num_registers > register_max) { | 358 if (result.num_registers > register_max) { |
| 346 SetIrregexpMaxRegisterCount(*data, result.num_registers); | 359 SetIrregexpMaxRegisterCount(*data, result.num_registers); |
| 347 } | 360 } |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 379 Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) { | 392 Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) { |
| 380 return Code::cast(re->get(JSRegExp::code_index(is_ascii))); | 393 return Code::cast(re->get(JSRegExp::code_index(is_ascii))); |
| 381 } | 394 } |
| 382 | 395 |
| 383 | 396 |
| 384 void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re, | 397 void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re, |
| 385 Handle<String> pattern, | 398 Handle<String> pattern, |
| 386 JSRegExp::Flags flags, | 399 JSRegExp::Flags flags, |
| 387 int capture_count) { | 400 int capture_count) { |
| 388 // Initialize compiled code entries to null. | 401 // Initialize compiled code entries to null. |
| 389 Factory::SetRegExpIrregexpData(re, | 402 re->GetIsolate()->factory()->SetRegExpIrregexpData(re, |
| 390 JSRegExp::IRREGEXP, | 403 JSRegExp::IRREGEXP, |
| 391 pattern, | 404 pattern, |
| 392 flags, | 405 flags, |
| 393 capture_count); | 406 capture_count); |
| 394 } | 407 } |
| 395 | 408 |
| 396 | 409 |
| 397 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, | 410 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, |
| 398 Handle<String> subject) { | 411 Handle<String> subject) { |
| 399 if (!subject->IsFlat()) { | 412 if (!subject->IsFlat()) { |
| 400 FlattenString(subject); | 413 FlattenString(subject); |
| 401 } | 414 } |
| 402 // Check the asciiness of the underlying storage. | 415 // Check the asciiness of the underlying storage. |
| 403 bool is_ascii; | 416 bool is_ascii; |
| (...skipping 17 matching lines...) Expand all Loading... |
| 421 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; | 434 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; |
| 422 #endif // V8_INTERPRETED_REGEXP | 435 #endif // V8_INTERPRETED_REGEXP |
| 423 } | 436 } |
| 424 | 437 |
| 425 | 438 |
| 426 RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce( | 439 RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce( |
| 427 Handle<JSRegExp> regexp, | 440 Handle<JSRegExp> regexp, |
| 428 Handle<String> subject, | 441 Handle<String> subject, |
| 429 int index, | 442 int index, |
| 430 Vector<int> output) { | 443 Vector<int> output) { |
| 431 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data())); | 444 Isolate* isolate = regexp->GetIsolate(); |
| 445 |
| 446 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate); |
| 432 | 447 |
| 433 ASSERT(index >= 0); | 448 ASSERT(index >= 0); |
| 434 ASSERT(index <= subject->length()); | 449 ASSERT(index <= subject->length()); |
| 435 ASSERT(subject->IsFlat()); | 450 ASSERT(subject->IsFlat()); |
| 436 | 451 |
| 437 // A flat ASCII string might have a two-byte first part. | 452 // A flat ASCII string might have a two-byte first part. |
| 438 if (subject->IsConsString()) { | 453 if (subject->IsConsString()) { |
| 439 subject = Handle<String>(ConsString::cast(*subject)->first()); | 454 subject = Handle<String>(ConsString::cast(*subject)->first(), isolate); |
| 440 } | 455 } |
| 441 | 456 |
| 442 #ifndef V8_INTERPRETED_REGEXP | 457 #ifndef V8_INTERPRETED_REGEXP |
| 443 ASSERT(output.length() >= | 458 ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); |
| 444 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); | |
| 445 do { | 459 do { |
| 446 bool is_ascii = subject->IsAsciiRepresentation(); | 460 bool is_ascii = subject->IsAsciiRepresentation(); |
| 447 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii)); | 461 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate); |
| 448 NativeRegExpMacroAssembler::Result res = | 462 NativeRegExpMacroAssembler::Result res = |
| 449 NativeRegExpMacroAssembler::Match(code, | 463 NativeRegExpMacroAssembler::Match(code, |
| 450 subject, | 464 subject, |
| 451 output.start(), | 465 output.start(), |
| 452 output.length(), | 466 output.length(), |
| 453 index); | 467 index, |
| 468 isolate); |
| 454 if (res != NativeRegExpMacroAssembler::RETRY) { | 469 if (res != NativeRegExpMacroAssembler::RETRY) { |
| 455 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || | 470 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || |
| 456 Top::has_pending_exception()); | 471 isolate->has_pending_exception()); |
| 457 STATIC_ASSERT( | 472 STATIC_ASSERT( |
| 458 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); | 473 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); |
| 459 STATIC_ASSERT( | 474 STATIC_ASSERT( |
| 460 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); | 475 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); |
| 461 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) | 476 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) |
| 462 == RE_EXCEPTION); | 477 == RE_EXCEPTION); |
| 463 return static_cast<IrregexpResult>(res); | 478 return static_cast<IrregexpResult>(res); |
| 464 } | 479 } |
| 465 // If result is RETRY, the string has changed representation, and we | 480 // If result is RETRY, the string has changed representation, and we |
| 466 // must restart from scratch. | 481 // must restart from scratch. |
| (...skipping 10 matching lines...) Expand all Loading... |
| 477 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp)); | 492 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp)); |
| 478 bool is_ascii = subject->IsAsciiRepresentation(); | 493 bool is_ascii = subject->IsAsciiRepresentation(); |
| 479 // We must have done EnsureCompiledIrregexp, so we can get the number of | 494 // We must have done EnsureCompiledIrregexp, so we can get the number of |
| 480 // registers. | 495 // registers. |
| 481 int* register_vector = output.start(); | 496 int* register_vector = output.start(); |
| 482 int number_of_capture_registers = | 497 int number_of_capture_registers = |
| 483 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; | 498 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; |
| 484 for (int i = number_of_capture_registers - 1; i >= 0; i--) { | 499 for (int i = number_of_capture_registers - 1; i >= 0; i--) { |
| 485 register_vector[i] = -1; | 500 register_vector[i] = -1; |
| 486 } | 501 } |
| 487 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii)); | 502 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate); |
| 488 | 503 |
| 489 if (IrregexpInterpreter::Match(byte_codes, | 504 if (IrregexpInterpreter::Match(byte_codes, |
| 490 subject, | 505 subject, |
| 491 register_vector, | 506 register_vector, |
| 492 index)) { | 507 index)) { |
| 493 return RE_SUCCESS; | 508 return RE_SUCCESS; |
| 494 } | 509 } |
| 495 return RE_FAILURE; | 510 return RE_FAILURE; |
| 496 #endif // V8_INTERPRETED_REGEXP | 511 #endif // V8_INTERPRETED_REGEXP |
| 497 } | 512 } |
| (...skipping 11 matching lines...) Expand all Loading... |
| 509 if (FLAG_trace_regexp_bytecodes) { | 524 if (FLAG_trace_regexp_bytecodes) { |
| 510 String* pattern = jsregexp->Pattern(); | 525 String* pattern = jsregexp->Pattern(); |
| 511 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); | 526 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); |
| 512 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); | 527 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); |
| 513 } | 528 } |
| 514 #endif | 529 #endif |
| 515 #endif | 530 #endif |
| 516 int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject); | 531 int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject); |
| 517 if (required_registers < 0) { | 532 if (required_registers < 0) { |
| 518 // Compiling failed with an exception. | 533 // Compiling failed with an exception. |
| 519 ASSERT(Top::has_pending_exception()); | 534 ASSERT(Isolate::Current()->has_pending_exception()); |
| 520 return Handle<Object>::null(); | 535 return Handle<Object>::null(); |
| 521 } | 536 } |
| 522 | 537 |
| 523 OffsetsVector registers(required_registers); | 538 OffsetsVector registers(required_registers); |
| 524 | 539 |
| 525 IrregexpResult res = RegExpImpl::IrregexpExecOnce( | 540 IrregexpResult res = RegExpImpl::IrregexpExecOnce( |
| 526 jsregexp, subject, previous_index, Vector<int>(registers.vector(), | 541 jsregexp, subject, previous_index, Vector<int>(registers.vector(), |
| 527 registers.length())); | 542 registers.length())); |
| 528 if (res == RE_SUCCESS) { | 543 if (res == RE_SUCCESS) { |
| 529 int capture_register_count = | 544 int capture_register_count = |
| 530 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2; | 545 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2; |
| 531 last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead); | 546 last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead); |
| 532 AssertNoAllocation no_gc; | 547 AssertNoAllocation no_gc; |
| 533 int* register_vector = registers.vector(); | 548 int* register_vector = registers.vector(); |
| 534 FixedArray* array = FixedArray::cast(last_match_info->elements()); | 549 FixedArray* array = FixedArray::cast(last_match_info->elements()); |
| 535 for (int i = 0; i < capture_register_count; i += 2) { | 550 for (int i = 0; i < capture_register_count; i += 2) { |
| 536 SetCapture(array, i, register_vector[i]); | 551 SetCapture(array, i, register_vector[i]); |
| 537 SetCapture(array, i + 1, register_vector[i + 1]); | 552 SetCapture(array, i + 1, register_vector[i + 1]); |
| 538 } | 553 } |
| 539 SetLastCaptureCount(array, capture_register_count); | 554 SetLastCaptureCount(array, capture_register_count); |
| 540 SetLastSubject(array, *subject); | 555 SetLastSubject(array, *subject); |
| 541 SetLastInput(array, *subject); | 556 SetLastInput(array, *subject); |
| 542 return last_match_info; | 557 return last_match_info; |
| 543 } | 558 } |
| 544 if (res == RE_EXCEPTION) { | 559 if (res == RE_EXCEPTION) { |
| 545 ASSERT(Top::has_pending_exception()); | 560 ASSERT(Isolate::Current()->has_pending_exception()); |
| 546 return Handle<Object>::null(); | 561 return Handle<Object>::null(); |
| 547 } | 562 } |
| 548 ASSERT(res == RE_FAILURE); | 563 ASSERT(res == RE_FAILURE); |
| 549 return Factory::null_value(); | 564 return Isolate::Current()->factory()->null_value(); |
| 550 } | 565 } |
| 551 | 566 |
| 552 | 567 |
| 553 // ------------------------------------------------------------------- | 568 // ------------------------------------------------------------------- |
| 554 // Implementation of the Irregexp regular expression engine. | 569 // Implementation of the Irregexp regular expression engine. |
| 555 // | 570 // |
| 556 // The Irregexp regular expression engine is intended to be a complete | 571 // The Irregexp regular expression engine is intended to be a complete |
| 557 // implementation of ECMAScript regular expressions. It generates either | 572 // implementation of ECMAScript regular expressions. It generates either |
| 558 // bytecodes or native code. | 573 // bytecodes or native code. |
| 559 | 574 |
| (...skipping 739 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1299 case Guard::GEQ: | 1314 case Guard::GEQ: |
| 1300 ASSERT(!trace->mentions_reg(guard->reg())); | 1315 ASSERT(!trace->mentions_reg(guard->reg())); |
| 1301 macro_assembler->IfRegisterLT(guard->reg(), | 1316 macro_assembler->IfRegisterLT(guard->reg(), |
| 1302 guard->value(), | 1317 guard->value(), |
| 1303 trace->backtrack()); | 1318 trace->backtrack()); |
| 1304 break; | 1319 break; |
| 1305 } | 1320 } |
| 1306 } | 1321 } |
| 1307 | 1322 |
| 1308 | 1323 |
| 1309 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize; | |
| 1310 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange; | |
| 1311 | |
| 1312 | |
| 1313 // Returns the number of characters in the equivalence class, omitting those | 1324 // Returns the number of characters in the equivalence class, omitting those |
| 1314 // that cannot occur in the source string because it is ASCII. | 1325 // that cannot occur in the source string because it is ASCII. |
| 1315 static int GetCaseIndependentLetters(uc16 character, | 1326 static int GetCaseIndependentLetters(Isolate* isolate, |
| 1327 uc16 character, |
| 1316 bool ascii_subject, | 1328 bool ascii_subject, |
| 1317 unibrow::uchar* letters) { | 1329 unibrow::uchar* letters) { |
| 1318 int length = uncanonicalize.get(character, '\0', letters); | 1330 int length = |
| 1331 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters); |
| 1319 // Unibrow returns 0 or 1 for characters where case independence is | 1332 // Unibrow returns 0 or 1 for characters where case independence is |
| 1320 // trivial. | 1333 // trivial. |
| 1321 if (length == 0) { | 1334 if (length == 0) { |
| 1322 letters[0] = character; | 1335 letters[0] = character; |
| 1323 length = 1; | 1336 length = 1; |
| 1324 } | 1337 } |
| 1325 if (!ascii_subject || character <= String::kMaxAsciiCharCode) { | 1338 if (!ascii_subject || character <= String::kMaxAsciiCharCode) { |
| 1326 return length; | 1339 return length; |
| 1327 } | 1340 } |
| 1328 // The standard requires that non-ASCII characters cannot have ASCII | 1341 // The standard requires that non-ASCII characters cannot have ASCII |
| 1329 // character codes in their equivalence class. | 1342 // character codes in their equivalence class. |
| 1330 return 0; | 1343 return 0; |
| 1331 } | 1344 } |
| 1332 | 1345 |
| 1333 | 1346 |
| 1334 static inline bool EmitSimpleCharacter(RegExpCompiler* compiler, | 1347 static inline bool EmitSimpleCharacter(Isolate* isolate, |
| 1348 RegExpCompiler* compiler, |
| 1335 uc16 c, | 1349 uc16 c, |
| 1336 Label* on_failure, | 1350 Label* on_failure, |
| 1337 int cp_offset, | 1351 int cp_offset, |
| 1338 bool check, | 1352 bool check, |
| 1339 bool preloaded) { | 1353 bool preloaded) { |
| 1340 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 1354 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 1341 bool bound_checked = false; | 1355 bool bound_checked = false; |
| 1342 if (!preloaded) { | 1356 if (!preloaded) { |
| 1343 assembler->LoadCurrentCharacter( | 1357 assembler->LoadCurrentCharacter( |
| 1344 cp_offset, | 1358 cp_offset, |
| 1345 on_failure, | 1359 on_failure, |
| 1346 check); | 1360 check); |
| 1347 bound_checked = true; | 1361 bound_checked = true; |
| 1348 } | 1362 } |
| 1349 assembler->CheckNotCharacter(c, on_failure); | 1363 assembler->CheckNotCharacter(c, on_failure); |
| 1350 return bound_checked; | 1364 return bound_checked; |
| 1351 } | 1365 } |
| 1352 | 1366 |
| 1353 | 1367 |
| 1354 // Only emits non-letters (things that don't have case). Only used for case | 1368 // Only emits non-letters (things that don't have case). Only used for case |
| 1355 // independent matches. | 1369 // independent matches. |
| 1356 static inline bool EmitAtomNonLetter(RegExpCompiler* compiler, | 1370 static inline bool EmitAtomNonLetter(Isolate* isolate, |
| 1371 RegExpCompiler* compiler, |
| 1357 uc16 c, | 1372 uc16 c, |
| 1358 Label* on_failure, | 1373 Label* on_failure, |
| 1359 int cp_offset, | 1374 int cp_offset, |
| 1360 bool check, | 1375 bool check, |
| 1361 bool preloaded) { | 1376 bool preloaded) { |
| 1362 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 1377 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
| 1363 bool ascii = compiler->ascii(); | 1378 bool ascii = compiler->ascii(); |
| 1364 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 1379 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 1365 int length = GetCaseIndependentLetters(c, ascii, chars); | 1380 int length = GetCaseIndependentLetters(isolate, c, ascii, chars); |
| 1366 if (length < 1) { | 1381 if (length < 1) { |
| 1367 // This can't match. Must be an ASCII subject and a non-ASCII character. | 1382 // This can't match. Must be an ASCII subject and a non-ASCII character. |
| 1368 // We do not need to do anything since the ASCII pass already handled this. | 1383 // We do not need to do anything since the ASCII pass already handled this. |
| 1369 return false; // Bounds not checked. | 1384 return false; // Bounds not checked. |
| 1370 } | 1385 } |
| 1371 bool checked = false; | 1386 bool checked = false; |
| 1372 // We handle the length > 1 case in a later pass. | 1387 // We handle the length > 1 case in a later pass. |
| 1373 if (length == 1) { | 1388 if (length == 1) { |
| 1374 if (ascii && c > String::kMaxAsciiCharCodeU) { | 1389 if (ascii && c > String::kMaxAsciiCharCodeU) { |
| 1375 // Can't match - see above. | 1390 // Can't match - see above. |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1417 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, | 1432 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, |
| 1418 diff, | 1433 diff, |
| 1419 mask, | 1434 mask, |
| 1420 on_failure); | 1435 on_failure); |
| 1421 return true; | 1436 return true; |
| 1422 } | 1437 } |
| 1423 return false; | 1438 return false; |
| 1424 } | 1439 } |
| 1425 | 1440 |
| 1426 | 1441 |
| 1427 typedef bool EmitCharacterFunction(RegExpCompiler* compiler, | 1442 typedef bool EmitCharacterFunction(Isolate* isolate, |
| 1443 RegExpCompiler* compiler, |
| 1428 uc16 c, | 1444 uc16 c, |
| 1429 Label* on_failure, | 1445 Label* on_failure, |
| 1430 int cp_offset, | 1446 int cp_offset, |
| 1431 bool check, | 1447 bool check, |
| 1432 bool preloaded); | 1448 bool preloaded); |
| 1433 | 1449 |
| 1434 // Only emits letters (things that have case). Only used for case independent | 1450 // Only emits letters (things that have case). Only used for case independent |
| 1435 // matches. | 1451 // matches. |
| 1436 static inline bool EmitAtomLetter(RegExpCompiler* compiler, | 1452 static inline bool EmitAtomLetter(Isolate* isolate, |
| 1453 RegExpCompiler* compiler, |
| 1437 uc16 c, | 1454 uc16 c, |
| 1438 Label* on_failure, | 1455 Label* on_failure, |
| 1439 int cp_offset, | 1456 int cp_offset, |
| 1440 bool check, | 1457 bool check, |
| 1441 bool preloaded) { | 1458 bool preloaded) { |
| 1442 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 1459 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
| 1443 bool ascii = compiler->ascii(); | 1460 bool ascii = compiler->ascii(); |
| 1444 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 1461 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 1445 int length = GetCaseIndependentLetters(c, ascii, chars); | 1462 int length = GetCaseIndependentLetters(isolate, c, ascii, chars); |
| 1446 if (length <= 1) return false; | 1463 if (length <= 1) return false; |
| 1447 // We may not need to check against the end of the input string | 1464 // We may not need to check against the end of the input string |
| 1448 // if this character lies before a character that matched. | 1465 // if this character lies before a character that matched. |
| 1449 if (!preloaded) { | 1466 if (!preloaded) { |
| 1450 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); | 1467 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); |
| 1451 } | 1468 } |
| 1452 Label ok; | 1469 Label ok; |
| 1453 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); | 1470 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); |
| 1454 switch (length) { | 1471 switch (length) { |
| 1455 case 2: { | 1472 case 2: { |
| (...skipping 417 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1873 // | 1890 // |
| 1874 // We iterate along the text object, building up for each character a | 1891 // We iterate along the text object, building up for each character a |
| 1875 // mask and value that can be used to test for a quick failure to match. | 1892 // mask and value that can be used to test for a quick failure to match. |
| 1876 // The masks and values for the positions will be combined into a single | 1893 // The masks and values for the positions will be combined into a single |
| 1877 // machine word for the current character width in order to be used in | 1894 // machine word for the current character width in order to be used in |
| 1878 // generating a quick check. | 1895 // generating a quick check. |
| 1879 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, | 1896 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, |
| 1880 RegExpCompiler* compiler, | 1897 RegExpCompiler* compiler, |
| 1881 int characters_filled_in, | 1898 int characters_filled_in, |
| 1882 bool not_at_start) { | 1899 bool not_at_start) { |
| 1900 Isolate* isolate = Isolate::Current(); |
| 1883 ASSERT(characters_filled_in < details->characters()); | 1901 ASSERT(characters_filled_in < details->characters()); |
| 1884 int characters = details->characters(); | 1902 int characters = details->characters(); |
| 1885 int char_mask; | 1903 int char_mask; |
| 1886 int char_shift; | 1904 int char_shift; |
| 1887 if (compiler->ascii()) { | 1905 if (compiler->ascii()) { |
| 1888 char_mask = String::kMaxAsciiCharCode; | 1906 char_mask = String::kMaxAsciiCharCode; |
| 1889 char_shift = 8; | 1907 char_shift = 8; |
| 1890 } else { | 1908 } else { |
| 1891 char_mask = String::kMaxUC16CharCode; | 1909 char_mask = String::kMaxUC16CharCode; |
| 1892 char_shift = 16; | 1910 char_shift = 16; |
| (...skipping 10 matching lines...) Expand all Loading... |
| 1903 // If we expect a non-ASCII character from an ASCII string, | 1921 // If we expect a non-ASCII character from an ASCII string, |
| 1904 // there is no way we can match. Not even case independent | 1922 // there is no way we can match. Not even case independent |
| 1905 // matching can turn an ASCII character into non-ASCII or | 1923 // matching can turn an ASCII character into non-ASCII or |
| 1906 // vice versa. | 1924 // vice versa. |
| 1907 details->set_cannot_match(); | 1925 details->set_cannot_match(); |
| 1908 pos->determines_perfectly = false; | 1926 pos->determines_perfectly = false; |
| 1909 return; | 1927 return; |
| 1910 } | 1928 } |
| 1911 if (compiler->ignore_case()) { | 1929 if (compiler->ignore_case()) { |
| 1912 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 1930 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 1913 int length = GetCaseIndependentLetters(c, compiler->ascii(), chars); | 1931 int length = GetCaseIndependentLetters(isolate, c, compiler->ascii(), |
| 1932 chars); |
| 1914 ASSERT(length != 0); // Can only happen if c > char_mask (see above). | 1933 ASSERT(length != 0); // Can only happen if c > char_mask (see above). |
| 1915 if (length == 1) { | 1934 if (length == 1) { |
| 1916 // This letter has no case equivalents, so it's nice and simple | 1935 // This letter has no case equivalents, so it's nice and simple |
| 1917 // and the mask-compare will determine definitely whether we have | 1936 // and the mask-compare will determine definitely whether we have |
| 1918 // a match at this character position. | 1937 // a match at this character position. |
| 1919 pos->mask = char_mask; | 1938 pos->mask = char_mask; |
| 1920 pos->value = c; | 1939 pos->value = c; |
| 1921 pos->determines_perfectly = true; | 1940 pos->determines_perfectly = true; |
| 1922 } else { | 1941 } else { |
| 1923 uint32_t common_bits = char_mask; | 1942 uint32_t common_bits = char_mask; |
| (...skipping 479 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2403 // loading characters, which means we do not need to recheck the bounds | 2422 // loading characters, which means we do not need to recheck the bounds |
| 2404 // up to the limit the quick check already checked. In addition the quick | 2423 // up to the limit the quick check already checked. In addition the quick |
| 2405 // check can have involved a mask and compare operation which may simplify | 2424 // check can have involved a mask and compare operation which may simplify |
| 2406 // or obviate the need for further checks at some character positions. | 2425 // or obviate the need for further checks at some character positions. |
| 2407 void TextNode::TextEmitPass(RegExpCompiler* compiler, | 2426 void TextNode::TextEmitPass(RegExpCompiler* compiler, |
| 2408 TextEmitPassType pass, | 2427 TextEmitPassType pass, |
| 2409 bool preloaded, | 2428 bool preloaded, |
| 2410 Trace* trace, | 2429 Trace* trace, |
| 2411 bool first_element_checked, | 2430 bool first_element_checked, |
| 2412 int* checked_up_to) { | 2431 int* checked_up_to) { |
| 2432 Isolate* isolate = Isolate::Current(); |
| 2413 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 2433 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 2414 bool ascii = compiler->ascii(); | 2434 bool ascii = compiler->ascii(); |
| 2415 Label* backtrack = trace->backtrack(); | 2435 Label* backtrack = trace->backtrack(); |
| 2416 QuickCheckDetails* quick_check = trace->quick_check_performed(); | 2436 QuickCheckDetails* quick_check = trace->quick_check_performed(); |
| 2417 int element_count = elms_->length(); | 2437 int element_count = elms_->length(); |
| 2418 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { | 2438 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { |
| 2419 TextElement elm = elms_->at(i); | 2439 TextElement elm = elms_->at(i); |
| 2420 int cp_offset = trace->cp_offset() + elm.cp_offset; | 2440 int cp_offset = trace->cp_offset() + elm.cp_offset; |
| 2421 if (elm.type == TextElement::ATOM) { | 2441 if (elm.type == TextElement::ATOM) { |
| 2422 Vector<const uc16> quarks = elm.data.u_atom->data(); | 2442 Vector<const uc16> quarks = elm.data.u_atom->data(); |
| (...skipping 15 matching lines...) Expand all Loading... |
| 2438 case SIMPLE_CHARACTER_MATCH: | 2458 case SIMPLE_CHARACTER_MATCH: |
| 2439 emit_function = &EmitSimpleCharacter; | 2459 emit_function = &EmitSimpleCharacter; |
| 2440 break; | 2460 break; |
| 2441 case CASE_CHARACTER_MATCH: | 2461 case CASE_CHARACTER_MATCH: |
| 2442 emit_function = &EmitAtomLetter; | 2462 emit_function = &EmitAtomLetter; |
| 2443 break; | 2463 break; |
| 2444 default: | 2464 default: |
| 2445 break; | 2465 break; |
| 2446 } | 2466 } |
| 2447 if (emit_function != NULL) { | 2467 if (emit_function != NULL) { |
| 2448 bool bound_checked = emit_function(compiler, | 2468 bool bound_checked = emit_function(isolate, |
| 2469 compiler, |
| 2449 quarks[j], | 2470 quarks[j], |
| 2450 backtrack, | 2471 backtrack, |
| 2451 cp_offset + j, | 2472 cp_offset + j, |
| 2452 *checked_up_to < cp_offset + j, | 2473 *checked_up_to < cp_offset + j, |
| 2453 preloaded); | 2474 preloaded); |
| 2454 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); | 2475 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); |
| 2455 } | 2476 } |
| 2456 } | 2477 } |
| 2457 } else { | 2478 } else { |
| 2458 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); | 2479 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); |
| (...skipping 1619 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4078 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase); | 4099 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase); |
| 4079 for (int i = 0; i < overlay.length(); i += 2) { | 4100 for (int i = 0; i < overlay.length(); i += 2) { |
| 4080 table.AddRange(CharacterRange(overlay[i], overlay[i+1]), | 4101 table.AddRange(CharacterRange(overlay[i], overlay[i+1]), |
| 4081 CharacterRangeSplitter::kInOverlay); | 4102 CharacterRangeSplitter::kInOverlay); |
| 4082 } | 4103 } |
| 4083 CharacterRangeSplitter callback(included, excluded); | 4104 CharacterRangeSplitter callback(included, excluded); |
| 4084 table.ForEach(&callback); | 4105 table.ForEach(&callback); |
| 4085 } | 4106 } |
| 4086 | 4107 |
| 4087 | 4108 |
| 4088 static void AddUncanonicals(ZoneList<CharacterRange>* ranges, | 4109 static void AddUncanonicals(Isolate* isolate, |
| 4110 ZoneList<CharacterRange>* ranges, |
| 4089 int bottom, | 4111 int bottom, |
| 4090 int top); | 4112 int top); |
| 4091 | 4113 |
| 4092 | 4114 |
| 4093 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, | 4115 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, |
| 4094 bool is_ascii) { | 4116 bool is_ascii) { |
| 4117 Isolate* isolate = Isolate::Current(); |
| 4095 uc16 bottom = from(); | 4118 uc16 bottom = from(); |
| 4096 uc16 top = to(); | 4119 uc16 top = to(); |
| 4097 if (is_ascii) { | 4120 if (is_ascii) { |
| 4098 if (bottom > String::kMaxAsciiCharCode) return; | 4121 if (bottom > String::kMaxAsciiCharCode) return; |
| 4099 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode; | 4122 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode; |
| 4100 } | 4123 } |
| 4101 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 4124 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 4102 if (top == bottom) { | 4125 if (top == bottom) { |
| 4103 // If this is a singleton we just expand the one character. | 4126 // If this is a singleton we just expand the one character. |
| 4104 int length = uncanonicalize.get(bottom, '\0', chars); | 4127 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars); |
| 4105 for (int i = 0; i < length; i++) { | 4128 for (int i = 0; i < length; i++) { |
| 4106 uc32 chr = chars[i]; | 4129 uc32 chr = chars[i]; |
| 4107 if (chr != bottom) { | 4130 if (chr != bottom) { |
| 4108 ranges->Add(CharacterRange::Singleton(chars[i])); | 4131 ranges->Add(CharacterRange::Singleton(chars[i])); |
| 4109 } | 4132 } |
| 4110 } | 4133 } |
| 4111 } else { | 4134 } else { |
| 4112 // If this is a range we expand the characters block by block, | 4135 // If this is a range we expand the characters block by block, |
| 4113 // expanding contiguous subranges (blocks) one at a time. | 4136 // expanding contiguous subranges (blocks) one at a time. |
| 4114 // The approach is as follows. For a given start character we | 4137 // The approach is as follows. For a given start character we |
| 4115 // look up the remainder of the block that contains it (represented | 4138 // look up the remainder of the block that contains it (represented |
| 4116 // by the end point), for instance we find 'z' if the character | 4139 // by the end point), for instance we find 'z' if the character |
| 4117 // is 'c'. A block is characterized by the property | 4140 // is 'c'. A block is characterized by the property |
| 4118 // that all characters uncanonicalize in the same way, except that | 4141 // that all characters uncanonicalize in the same way, except that |
| 4119 // each entry in the result is incremented by the distance from the first | 4142 // each entry in the result is incremented by the distance from the first |
| 4120 // element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] and | 4143 // element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] and |
| 4121 // the k'th letter uncanonicalizes to ['a' + k, 'A' + k]. | 4144 // the k'th letter uncanonicalizes to ['a' + k, 'A' + k]. |
| 4122 // Once we've found the end point we look up its uncanonicalization | 4145 // Once we've found the end point we look up its uncanonicalization |
| 4123 // and produce a range for each element. For instance for [c-f] | 4146 // and produce a range for each element. For instance for [c-f] |
| 4124 // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only | 4147 // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only |
| 4125 // add a range if it is not already contained in the input, so [c-f] | 4148 // add a range if it is not already contained in the input, so [c-f] |
| 4126 // will be skipped but [C-F] will be added. If this range is not | 4149 // will be skipped but [C-F] will be added. If this range is not |
| 4127 // completely contained in a block we do this for all the blocks | 4150 // completely contained in a block we do this for all the blocks |
| 4128 // covered by the range (handling characters that is not in a block | 4151 // covered by the range (handling characters that is not in a block |
| 4129 // as a "singleton block"). | 4152 // as a "singleton block"). |
| 4130 unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 4153 unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 4131 int pos = bottom; | 4154 int pos = bottom; |
| 4132 while (pos < top) { | 4155 while (pos < top) { |
| 4133 int length = canonrange.get(pos, '\0', range); | 4156 int length = isolate->jsregexp_canonrange()->get(pos, '\0', range); |
| 4134 uc16 block_end; | 4157 uc16 block_end; |
| 4135 if (length == 0) { | 4158 if (length == 0) { |
| 4136 block_end = pos; | 4159 block_end = pos; |
| 4137 } else { | 4160 } else { |
| 4138 ASSERT_EQ(1, length); | 4161 ASSERT_EQ(1, length); |
| 4139 block_end = range[0]; | 4162 block_end = range[0]; |
| 4140 } | 4163 } |
| 4141 int end = (block_end > top) ? top : block_end; | 4164 int end = (block_end > top) ? top : block_end; |
| 4142 length = uncanonicalize.get(block_end, '\0', range); | 4165 length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0', range); |
| 4143 for (int i = 0; i < length; i++) { | 4166 for (int i = 0; i < length; i++) { |
| 4144 uc32 c = range[i]; | 4167 uc32 c = range[i]; |
| 4145 uc16 range_from = c - (block_end - pos); | 4168 uc16 range_from = c - (block_end - pos); |
| 4146 uc16 range_to = c - (block_end - end); | 4169 uc16 range_to = c - (block_end - end); |
| 4147 if (!(bottom <= range_from && range_to <= top)) { | 4170 if (!(bottom <= range_from && range_to <= top)) { |
| 4148 ranges->Add(CharacterRange(range_from, range_to)); | 4171 ranges->Add(CharacterRange(range_from, range_to)); |
| 4149 } | 4172 } |
| 4150 } | 4173 } |
| 4151 pos = end + 1; | 4174 pos = end + 1; |
| 4152 } | 4175 } |
| (...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4242 result.SetElementsInSecondSet(); | 4265 result.SetElementsInSecondSet(); |
| 4243 } else if (j < range->length()) { | 4266 } else if (j < range->length()) { |
| 4244 // Argument range contains something not in word range. | 4267 // Argument range contains something not in word range. |
| 4245 result.SetElementsInFirstSet(); | 4268 result.SetElementsInFirstSet(); |
| 4246 } | 4269 } |
| 4247 | 4270 |
| 4248 return result; | 4271 return result; |
| 4249 } | 4272 } |
| 4250 | 4273 |
| 4251 | 4274 |
| 4252 static void AddUncanonicals(ZoneList<CharacterRange>* ranges, | 4275 static void AddUncanonicals(Isolate* isolate, |
| 4276 ZoneList<CharacterRange>* ranges, |
| 4253 int bottom, | 4277 int bottom, |
| 4254 int top) { | 4278 int top) { |
| 4255 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 4279 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 4256 // Zones with no case mappings. There is a DEBUG-mode loop to assert that | 4280 // Zones with no case mappings. There is a DEBUG-mode loop to assert that |
| 4257 // this table is correct. | 4281 // this table is correct. |
| 4258 // 0x0600 - 0x0fff | 4282 // 0x0600 - 0x0fff |
| 4259 // 0x1100 - 0x1cff | 4283 // 0x1100 - 0x1cff |
| 4260 // 0x2000 - 0x20ff | 4284 // 0x2000 - 0x20ff |
| 4261 // 0x2200 - 0x23ff | 4285 // 0x2200 - 0x23ff |
| 4262 // 0x2500 - 0x2bff | 4286 // 0x2500 - 0x2bff |
| (...skipping 11 matching lines...) Expand all Loading... |
| 4274 if (top <= boundaries[0]) { | 4298 if (top <= boundaries[0]) { |
| 4275 CharacterRange range(bottom, top); | 4299 CharacterRange range(bottom, top); |
| 4276 range.AddCaseEquivalents(ranges, false); | 4300 range.AddCaseEquivalents(ranges, false); |
| 4277 return; | 4301 return; |
| 4278 } | 4302 } |
| 4279 | 4303 |
| 4280 // Split up very large ranges. This helps remove ranges where there are no | 4304 // Split up very large ranges. This helps remove ranges where there are no |
| 4281 // case mappings. | 4305 // case mappings. |
| 4282 for (int i = 0; i < boundary_count; i++) { | 4306 for (int i = 0; i < boundary_count; i++) { |
| 4283 if (bottom < boundaries[i] && top >= boundaries[i]) { | 4307 if (bottom < boundaries[i] && top >= boundaries[i]) { |
| 4284 AddUncanonicals(ranges, bottom, boundaries[i] - 1); | 4308 AddUncanonicals(isolate, ranges, bottom, boundaries[i] - 1); |
| 4285 AddUncanonicals(ranges, boundaries[i], top); | 4309 AddUncanonicals(isolate, ranges, boundaries[i], top); |
| 4286 return; | 4310 return; |
| 4287 } | 4311 } |
| 4288 } | 4312 } |
| 4289 | 4313 |
| 4290 // If we are completely in a zone with no case mappings then we are done. | 4314 // If we are completely in a zone with no case mappings then we are done. |
| 4291 for (int i = 0; i < boundary_count; i += 2) { | 4315 for (int i = 0; i < boundary_count; i += 2) { |
| 4292 if (bottom >= boundaries[i] && top < boundaries[i + 1]) { | 4316 if (bottom >= boundaries[i] && top < boundaries[i + 1]) { |
| 4293 #ifdef DEBUG | 4317 #ifdef DEBUG |
| 4294 for (int j = bottom; j <= top; j++) { | 4318 for (int j = bottom; j <= top; j++) { |
| 4295 unsigned current_char = j; | 4319 unsigned current_char = j; |
| 4296 int length = uncanonicalize.get(current_char, '\0', chars); | 4320 int length = isolate->jsregexp_uncanonicalize()->get(current_char, |
| 4321 '\0', chars); |
| 4297 for (int k = 0; k < length; k++) { | 4322 for (int k = 0; k < length; k++) { |
| 4298 ASSERT(chars[k] == current_char); | 4323 ASSERT(chars[k] == current_char); |
| 4299 } | 4324 } |
| 4300 } | 4325 } |
| 4301 #endif | 4326 #endif |
| 4302 return; | 4327 return; |
| 4303 } | 4328 } |
| 4304 } | 4329 } |
| 4305 | 4330 |
| 4306 // Step through the range finding equivalent characters. | 4331 // Step through the range finding equivalent characters. |
| 4307 ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100); | 4332 ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100); |
| 4308 for (int i = bottom; i <= top; i++) { | 4333 for (int i = bottom; i <= top; i++) { |
| 4309 int length = uncanonicalize.get(i, '\0', chars); | 4334 int length = isolate->jsregexp_uncanonicalize()->get(i, '\0', chars); |
| 4310 for (int j = 0; j < length; j++) { | 4335 for (int j = 0; j < length; j++) { |
| 4311 uc32 chr = chars[j]; | 4336 uc32 chr = chars[j]; |
| 4312 if (chr != i && (chr < bottom || chr > top)) { | 4337 if (chr != i && (chr < bottom || chr > top)) { |
| 4313 characters->Add(chr); | 4338 characters->Add(chr); |
| 4314 } | 4339 } |
| 4315 } | 4340 } |
| 4316 } | 4341 } |
| 4317 | 4342 |
| 4318 // Step through the equivalent characters finding simple ranges and | 4343 // Step through the equivalent characters finding simple ranges and |
| 4319 // adding ranges to the character class. | 4344 // adding ranges to the character class. |
| (...skipping 501 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4821 else | 4846 else |
| 4822 return empty(); | 4847 return empty(); |
| 4823 } | 4848 } |
| 4824 | 4849 |
| 4825 | 4850 |
| 4826 // ------------------------------------------------------------------- | 4851 // ------------------------------------------------------------------- |
| 4827 // Analysis | 4852 // Analysis |
| 4828 | 4853 |
| 4829 | 4854 |
| 4830 void Analysis::EnsureAnalyzed(RegExpNode* that) { | 4855 void Analysis::EnsureAnalyzed(RegExpNode* that) { |
| 4831 StackLimitCheck check; | 4856 StackLimitCheck check(Isolate::Current()); |
| 4832 if (check.HasOverflowed()) { | 4857 if (check.HasOverflowed()) { |
| 4833 fail("Stack overflow"); | 4858 fail("Stack overflow"); |
| 4834 return; | 4859 return; |
| 4835 } | 4860 } |
| 4836 if (that->info()->been_analyzed || that->info()->being_analyzed) | 4861 if (that->info()->been_analyzed || that->info()->being_analyzed) |
| 4837 return; | 4862 return; |
| 4838 that->info()->being_analyzed = true; | 4863 that->info()->being_analyzed = true; |
| 4839 that->Accept(this); | 4864 that->Accept(this); |
| 4840 that->info()->being_analyzed = false; | 4865 that->info()->being_analyzed = false; |
| 4841 that->info()->been_analyzed = true; | 4866 that->info()->been_analyzed = true; |
| (...skipping 489 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5331 macro_assembler.SetCurrentPositionFromEnd(max_length); | 5356 macro_assembler.SetCurrentPositionFromEnd(max_length); |
| 5332 } | 5357 } |
| 5333 | 5358 |
| 5334 return compiler.Assemble(¯o_assembler, | 5359 return compiler.Assemble(¯o_assembler, |
| 5335 node, | 5360 node, |
| 5336 data->capture_count, | 5361 data->capture_count, |
| 5337 pattern); | 5362 pattern); |
| 5338 } | 5363 } |
| 5339 | 5364 |
| 5340 | 5365 |
| 5341 int OffsetsVector::static_offsets_vector_[ | |
| 5342 OffsetsVector::kStaticOffsetsVectorSize]; | |
| 5343 | |
| 5344 }} // namespace v8::internal | 5366 }} // namespace v8::internal |
| OLD | NEW |