Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 194 Handle<String> error_text, | 194 Handle<String> error_text, |
| 195 const char* message) { | 195 const char* message) { |
| 196 Handle<JSArray> array = Factory::NewJSArray(2); | 196 Handle<JSArray> array = Factory::NewJSArray(2); |
| 197 SetElement(array, 0, pattern); | 197 SetElement(array, 0, pattern); |
| 198 SetElement(array, 1, error_text); | 198 SetElement(array, 1, error_text); |
| 199 Handle<Object> regexp_err = Factory::NewSyntaxError(message, array); | 199 Handle<Object> regexp_err = Factory::NewSyntaxError(message, array); |
| 200 Top::Throw(*regexp_err); | 200 Top::Throw(*regexp_err); |
| 201 } | 201 } |
| 202 | 202 |
| 203 | 203 |
| 204 // Generic RegExp methods. Dispatches to implementation specific methods. | |
| 205 | |
| 206 | |
| 207 class OffsetsVector { | |
| 208 public: | |
| 209 inline OffsetsVector(int num_registers) | |
| 210 : offsets_vector_length_(num_registers) { | |
| 211 if (offsets_vector_length_ > kStaticOffsetsVectorSize) { | |
| 212 vector_ = NewArray<int>(offsets_vector_length_); | |
| 213 } else { | |
| 214 vector_ = static_offsets_vector_; | |
| 215 } | |
| 216 } | |
| 217 | |
| 218 | |
| 219 inline ~OffsetsVector() { | |
| 220 if (offsets_vector_length_ > kStaticOffsetsVectorSize) { | |
| 221 DeleteArray(vector_); | |
| 222 vector_ = NULL; | |
| 223 } | |
| 224 } | |
| 225 | |
| 226 | |
| 227 inline int* vector() { | |
| 228 return vector_; | |
| 229 } | |
| 230 | |
| 231 | |
| 232 inline int length() { | |
| 233 return offsets_vector_length_; | |
| 234 } | |
| 235 | |
| 236 private: | |
| 237 int* vector_; | |
| 238 int offsets_vector_length_; | |
| 239 static const int kStaticOffsetsVectorSize = 50; | |
| 240 static int static_offsets_vector_[kStaticOffsetsVectorSize]; | |
| 241 }; | |
| 242 | |
| 243 | |
| 244 int OffsetsVector::static_offsets_vector_[ | |
| 245 OffsetsVector::kStaticOffsetsVectorSize]; | |
| 246 | |
| 247 | |
| 204 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, | 248 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, |
| 205 Handle<String> pattern, | 249 Handle<String> pattern, |
| 206 Handle<String> flag_str) { | 250 Handle<String> flag_str) { |
| 207 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); | 251 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); |
| 208 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags); | 252 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags); |
| 209 bool in_cache = !cached.is_null(); | 253 bool in_cache = !cached.is_null(); |
| 210 LOG(RegExpCompileEvent(re, in_cache)); | 254 LOG(RegExpCompileEvent(re, in_cache)); |
| 211 | 255 |
| 212 Handle<Object> result; | 256 Handle<Object> result; |
| 213 if (in_cache) { | 257 if (in_cache) { |
| 214 re->set_data(*cached); | 258 re->set_data(*cached); |
| 215 result = re; | 259 result = re; |
| 216 } else { | 260 } else { |
| 217 FlattenString(pattern); | 261 FlattenString(pattern); |
| 218 ZoneScope zone_scope(DELETE_ON_EXIT); | 262 ZoneScope zone_scope(DELETE_ON_EXIT); |
| 219 RegExpParseResult parse_result; | 263 RegExpParseResult parse_result; |
| 220 FlatStringReader reader(pattern); | 264 FlatStringReader reader(pattern); |
| 221 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { | 265 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { |
| 222 // Throw an exception if we fail to parse the pattern. | 266 // Throw an exception if we fail to parse the pattern. |
| 223 ThrowRegExpException(re, | 267 ThrowRegExpException(re, |
| 224 pattern, | 268 pattern, |
| 225 parse_result.error, | 269 parse_result.error, |
| 226 "malformed_regexp"); | 270 "malformed_regexp"); |
| 227 return Handle<Object>(); | 271 return Handle<Object>::null(); |
| 228 } | 272 } |
| 229 RegExpAtom* atom = parse_result.tree->AsAtom(); | 273 RegExpAtom* atom = parse_result.tree->AsAtom(); |
| 230 if (atom != NULL && !flags.is_ignore_case()) { | 274 if (atom != NULL && !flags.is_ignore_case()) { |
| 231 if (parse_result.has_character_escapes) { | 275 if (parse_result.has_character_escapes) { |
| 232 Vector<const uc16> atom_pattern = atom->data(); | 276 Vector<const uc16> atom_pattern = atom->data(); |
| 233 Handle<String> atom_string = | 277 Handle<String> atom_string = |
| 234 Factory::NewStringFromTwoByte(atom_pattern); | 278 Factory::NewStringFromTwoByte(atom_pattern); |
| 235 result = AtomCompile(re, pattern, flags, atom_string); | 279 result = AtomCompile(re, pattern, flags, atom_string); |
| 236 } else { | 280 } else { |
| 237 result = AtomCompile(re, pattern, flags, pattern); | 281 result = AtomCompile(re, pattern, flags, pattern); |
| 238 } | 282 } |
| 239 } else { | 283 } else { |
| 240 RegExpNode* node = NULL; | 284 if (FLAG_irregexp) { |
| 241 Handle<FixedArray> irregexp_data = | 285 result = IrregexpPrepare(re, pattern, flags); |
| 242 RegExpEngine::Compile(&parse_result, | 286 } else { |
| 243 &node, | |
| 244 flags.is_ignore_case(), | |
| 245 flags.is_multiline(), | |
| 246 pattern); | |
| 247 if (irregexp_data.is_null()) { | |
| 248 if (FLAG_disable_jscre) { | |
| 249 UNIMPLEMENTED(); | |
| 250 } | |
| 251 result = JscrePrepare(re, pattern, flags); | 287 result = JscrePrepare(re, pattern, flags); |
| 252 } else { | |
| 253 result = IrregexpPrepare(re, pattern, flags, irregexp_data); | |
| 254 } | 288 } |
| 255 } | 289 } |
| 256 Object* data = re->data(); | 290 Object* data = re->data(); |
| 257 if (data->IsFixedArray()) { | 291 if (data->IsFixedArray()) { |
| 258 // If compilation succeeded then the data is set on the regexp | 292 // If compilation succeeded then the data is set on the regexp |
| 259 // and we can store it in the cache. | 293 // and we can store it in the cache. |
| 260 Handle<FixedArray> data(FixedArray::cast(re->data())); | 294 Handle<FixedArray> data(FixedArray::cast(re->data())); |
| 261 CompilationCache::PutRegExp(pattern, flags, data); | 295 CompilationCache::PutRegExp(pattern, flags, data); |
| 262 } | 296 } |
| 263 } | 297 } |
| 264 | 298 |
| 265 return result; | 299 return result; |
| 266 } | 300 } |
| 267 | 301 |
| 268 | 302 |
| 269 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, | 303 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, |
| 270 Handle<String> subject, | 304 Handle<String> subject, |
| 271 Handle<Object> index) { | 305 Handle<Object> index) { |
| 272 switch (regexp->TypeTag()) { | 306 switch (regexp->TypeTag()) { |
| 307 case JSRegExp::ATOM: | |
| 308 return AtomExec(regexp, subject, index); | |
| 309 case JSRegExp::IRREGEXP: { | |
| 310 Handle<Object> result = IrregexpExec(regexp, subject, index); | |
| 311 if (!result.is_null()) { | |
| 312 return result; | |
| 313 } | |
| 314 // We couldn't handle the regexp using Irregexp, so fall back | |
| 315 // on JSCRE. We rejoice at the though of the day when this is | |
|
Erik Corry
2008/12/08 12:47:51
spolling.
| |
| 316 // no longer needed. | |
| 317 // Reset the JSRegExp to use JSCRE. | |
| 318 JscrePrepare(regexp, | |
| 319 Handle<String>(regexp->Pattern()), | |
| 320 regexp->GetFlags()); | |
| 321 // Fall-through to JSCRE. | |
| 322 } | |
| 273 case JSRegExp::JSCRE: | 323 case JSRegExp::JSCRE: |
| 274 if (FLAG_disable_jscre) { | 324 if (FLAG_disable_jscre) { |
| 275 UNIMPLEMENTED(); | 325 UNIMPLEMENTED(); |
| 276 } | 326 } |
| 277 return JscreExec(regexp, subject, index); | 327 return JscreExec(regexp, subject, index); |
| 278 case JSRegExp::ATOM: | |
| 279 return AtomExec(regexp, subject, index); | |
| 280 case JSRegExp::IRREGEXP: | |
| 281 return IrregexpExec(regexp, subject, index); | |
| 282 default: | 328 default: |
| 283 UNREACHABLE(); | 329 UNREACHABLE(); |
| 284 return Handle<Object>(); | 330 return Handle<Object>::null(); |
| 285 } | 331 } |
| 286 } | 332 } |
| 287 | 333 |
| 288 | 334 |
| 289 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, | 335 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, |
| 290 Handle<String> subject) { | 336 Handle<String> subject) { |
| 291 switch (regexp->TypeTag()) { | 337 switch (regexp->TypeTag()) { |
| 338 case JSRegExp::ATOM: | |
| 339 return AtomExecGlobal(regexp, subject); | |
| 340 case JSRegExp::IRREGEXP: { | |
| 341 Handle<Object> result = IrregexpExecGlobal(regexp, subject); | |
| 342 if (!result.is_null()) { | |
| 343 return result; | |
| 344 } | |
| 345 // We couldn't handle the regexp using Irregexp, so fall back | |
| 346 // on JSCRE. We rejoice at the though of the day when this is | |
|
Erik Corry
2008/12/08 12:47:51
Speling
| |
| 347 // no longer needed. | |
| 348 // Reset the JSRegExp to use JSCRE. | |
| 349 JscrePrepare(regexp, | |
| 350 Handle<String>(regexp->Pattern()), | |
| 351 regexp->GetFlags()); | |
| 352 // Fall-through to JSCRE. | |
| 353 } | |
| 292 case JSRegExp::JSCRE: | 354 case JSRegExp::JSCRE: |
| 293 if (FLAG_disable_jscre) { | 355 if (FLAG_disable_jscre) { |
| 294 UNIMPLEMENTED(); | 356 UNIMPLEMENTED(); |
| 295 } | 357 } |
| 296 return JscreExecGlobal(regexp, subject); | 358 return JscreExecGlobal(regexp, subject); |
| 297 case JSRegExp::ATOM: | |
| 298 return AtomExecGlobal(regexp, subject); | |
| 299 case JSRegExp::IRREGEXP: | |
| 300 return IrregexpExecGlobal(regexp, subject); | |
| 301 default: | 359 default: |
| 302 UNREACHABLE(); | 360 UNREACHABLE(); |
| 303 return Handle<Object>(); | 361 return Handle<Object>::null(); |
| 304 } | 362 } |
| 305 } | 363 } |
| 306 | 364 |
| 307 | 365 |
| 366 // RegExp Atom implementation: Simple string search using indexOf. | |
| 367 | |
| 368 | |
| 308 Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re, | 369 Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re, |
| 309 Handle<String> pattern, | 370 Handle<String> pattern, |
| 310 JSRegExp::Flags flags, | 371 JSRegExp::Flags flags, |
| 311 Handle<String> match_pattern) { | 372 Handle<String> match_pattern) { |
| 312 Factory::SetRegExpData(re, JSRegExp::ATOM, pattern, flags, match_pattern); | 373 Factory::SetRegExpData(re, JSRegExp::ATOM, pattern, flags, match_pattern); |
| 313 return re; | 374 return re; |
| 314 } | 375 } |
| 315 | 376 |
| 316 | 377 |
| 317 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, | 378 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 359 Handle<JSArray> pair = Factory::NewJSArrayWithElements(array); | 420 Handle<JSArray> pair = Factory::NewJSArrayWithElements(array); |
| 360 SetElement(result, match_count, pair); | 421 SetElement(result, match_count, pair); |
| 361 match_count++; | 422 match_count++; |
| 362 index = end; | 423 index = end; |
| 363 if (needle_length == 0) index++; | 424 if (needle_length == 0) index++; |
| 364 } | 425 } |
| 365 return result; | 426 return result; |
| 366 } | 427 } |
| 367 | 428 |
| 368 | 429 |
| 430 // JSCRE implementation. | |
| 431 | |
| 432 | |
| 433 int RegExpImpl::JscreNumberOfCaptures(Handle<JSRegExp> re) { | |
| 434 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex)); | |
| 435 return Smi::cast(value->get(kJscreNumberOfCapturesIndex))->value(); | |
| 436 } | |
| 437 | |
| 438 | |
| 439 ByteArray* RegExpImpl::JscreInternal(Handle<JSRegExp> re) { | |
| 440 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex)); | |
| 441 return ByteArray::cast(value->get(kJscreInternalIndex)); | |
| 442 } | |
| 443 | |
| 444 | |
| 369 Handle<Object>RegExpImpl::JscrePrepare(Handle<JSRegExp> re, | 445 Handle<Object>RegExpImpl::JscrePrepare(Handle<JSRegExp> re, |
| 370 Handle<String> pattern, | 446 Handle<String> pattern, |
| 371 JSRegExp::Flags flags) { | 447 JSRegExp::Flags flags) { |
| 372 Handle<Object> value(Heap::undefined_value()); | 448 Handle<Object> value(Heap::undefined_value()); |
| 373 Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value); | 449 Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value); |
| 374 return re; | 450 return re; |
| 375 } | 451 } |
| 376 | 452 |
| 377 | 453 |
| 378 Handle<Object>RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re, | 454 static inline Object* JscreDoCompile(String* pattern, |
| 379 Handle<String> pattern, | 455 JSRegExp::Flags flags, |
| 380 JSRegExp::Flags flags, | 456 unsigned* number_of_captures, |
| 381 Handle<FixedArray> irregexp_data) { | 457 const char** error_message, |
| 382 Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags, irregexp_data); | 458 v8::jscre::JscreRegExp** code) { |
| 383 return re; | |
| 384 } | |
| 385 | |
| 386 | |
| 387 static inline Object* DoCompile(String* pattern, | |
| 388 JSRegExp::Flags flags, | |
| 389 unsigned* number_of_captures, | |
| 390 const char** error_message, | |
| 391 v8::jscre::JscreRegExp** code) { | |
| 392 v8::jscre::JSRegExpIgnoreCaseOption case_option = flags.is_ignore_case() | 459 v8::jscre::JSRegExpIgnoreCaseOption case_option = flags.is_ignore_case() |
| 393 ? v8::jscre::JSRegExpIgnoreCase | 460 ? v8::jscre::JSRegExpIgnoreCase |
| 394 : v8::jscre::JSRegExpDoNotIgnoreCase; | 461 : v8::jscre::JSRegExpDoNotIgnoreCase; |
| 395 v8::jscre::JSRegExpMultilineOption multiline_option = flags.is_multiline() | 462 v8::jscre::JSRegExpMultilineOption multiline_option = flags.is_multiline() |
| 396 ? v8::jscre::JSRegExpMultiline | 463 ? v8::jscre::JSRegExpMultiline |
| 397 : v8::jscre::JSRegExpSingleLine; | 464 : v8::jscre::JSRegExpSingleLine; |
| 398 *error_message = NULL; | 465 *error_message = NULL; |
| 399 malloc_failure = Failure::Exception(); | 466 malloc_failure = Failure::Exception(); |
| 400 *code = v8::jscre::jsRegExpCompile(pattern->GetTwoByteData(), | 467 *code = v8::jscre::jsRegExpCompile(pattern->GetTwoByteData(), |
| 401 pattern->length(), | 468 pattern->length(), |
| 402 case_option, | 469 case_option, |
| 403 multiline_option, | 470 multiline_option, |
| 404 number_of_captures, | 471 number_of_captures, |
| 405 error_message, | 472 error_message, |
| 406 &JSREMalloc, | 473 &JSREMalloc, |
| 407 &JSREFree); | 474 &JSREFree); |
| 408 if (*code == NULL && (malloc_failure->IsRetryAfterGC() || | 475 if (*code == NULL && (malloc_failure->IsRetryAfterGC() || |
| 409 malloc_failure->IsOutOfMemoryFailure())) { | 476 malloc_failure->IsOutOfMemoryFailure())) { |
| 410 return malloc_failure; | 477 return malloc_failure; |
| 411 } else { | 478 } else { |
| 412 // It doesn't matter which object we return here, we just need to return | 479 // It doesn't matter which object we return here, we just need to return |
| 413 // a non-failure to indicate to the GC-retry code that there was no | 480 // a non-failure to indicate to the GC-retry code that there was no |
| 414 // allocation failure. | 481 // allocation failure. |
| 415 return pattern; | 482 return pattern; |
| 416 } | 483 } |
| 417 } | 484 } |
| 418 | 485 |
| 419 | 486 |
| 420 void CompileWithRetryAfterGC(Handle<String> pattern, | 487 static void JscreCompileWithRetryAfterGC(Handle<String> pattern, |
| 421 JSRegExp::Flags flags, | 488 JSRegExp::Flags flags, |
| 422 unsigned* number_of_captures, | 489 unsigned* number_of_captures, |
| 423 const char** error_message, | 490 const char** error_message, |
| 424 v8::jscre::JscreRegExp** code) { | 491 v8::jscre::JscreRegExp** code) { |
| 425 CALL_HEAP_FUNCTION_VOID(DoCompile(*pattern, | 492 CALL_HEAP_FUNCTION_VOID(JscreDoCompile(*pattern, |
| 426 flags, | 493 flags, |
| 427 number_of_captures, | 494 number_of_captures, |
| 428 error_message, | 495 error_message, |
| 429 code)); | 496 code)); |
| 430 } | 497 } |
| 431 | 498 |
| 432 | 499 |
| 433 Handle<Object> RegExpImpl::JscreCompile(Handle<JSRegExp> re) { | 500 Handle<Object> RegExpImpl::JscreCompile(Handle<JSRegExp> re) { |
| 434 ASSERT_EQ(re->TypeTag(), JSRegExp::JSCRE); | 501 ASSERT_EQ(re->TypeTag(), JSRegExp::JSCRE); |
| 435 ASSERT(re->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()); | 502 ASSERT(re->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()); |
| 436 | 503 |
| 437 Handle<String> pattern(re->Pattern()); | 504 Handle<String> pattern(re->Pattern()); |
| 438 JSRegExp::Flags flags = re->GetFlags(); | 505 JSRegExp::Flags flags = re->GetFlags(); |
| 439 | 506 |
| 440 Handle<String> two_byte_pattern = StringToTwoByte(pattern); | 507 Handle<String> two_byte_pattern = StringToTwoByte(pattern); |
| 441 | 508 |
| 442 unsigned number_of_captures; | 509 unsigned number_of_captures; |
| 443 const char* error_message = NULL; | 510 const char* error_message = NULL; |
| 444 | 511 |
| 445 v8::jscre::JscreRegExp* code = NULL; | 512 v8::jscre::JscreRegExp* code = NULL; |
| 446 FlattenString(pattern); | 513 FlattenString(pattern); |
| 447 | 514 |
| 448 CompileWithRetryAfterGC(two_byte_pattern, | 515 JscreCompileWithRetryAfterGC(two_byte_pattern, |
| 449 flags, | 516 flags, |
| 450 &number_of_captures, | 517 &number_of_captures, |
| 451 &error_message, | 518 &error_message, |
| 452 &code); | 519 &code); |
| 453 | 520 |
| 454 if (code == NULL) { | 521 if (code == NULL) { |
| 455 // Throw an exception. | 522 // Throw an exception. |
| 456 Handle<JSArray> array = Factory::NewJSArray(2); | 523 Handle<JSArray> array = Factory::NewJSArray(2); |
| 457 SetElement(array, 0, pattern); | 524 SetElement(array, 0, pattern); |
| 458 SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector( | 525 SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector( |
| 459 (error_message == NULL) ? "Unknown regexp error" : error_message))); | 526 (error_message == NULL) ? "Unknown regexp error" : error_message))); |
| 460 Handle<Object> regexp_err = | 527 Handle<Object> regexp_err = |
| 461 Factory::NewSyntaxError("malformed_regexp", array); | 528 Factory::NewSyntaxError("malformed_regexp", array); |
| 462 Top::Throw(*regexp_err); | 529 Top::Throw(*regexp_err); |
| 463 return Handle<Object>(); | 530 return Handle<Object>(); |
| 464 } | 531 } |
| 465 | 532 |
| 466 // Convert the return address to a ByteArray pointer. | 533 // Convert the return address to a ByteArray pointer. |
| 467 Handle<ByteArray> internal( | 534 Handle<ByteArray> internal( |
| 468 ByteArray::FromDataStartAddress(reinterpret_cast<Address>(code))); | 535 ByteArray::FromDataStartAddress(reinterpret_cast<Address>(code))); |
| 469 | 536 |
| 470 Handle<FixedArray> value = Factory::NewFixedArray(kJscreDataLength); | 537 Handle<FixedArray> value = Factory::NewFixedArray(kJscreDataLength); |
| 471 value->set(kJscreNumberOfCapturesIndex, Smi::FromInt(number_of_captures)); | 538 value->set(kJscreNumberOfCapturesIndex, Smi::FromInt(number_of_captures)); |
| 472 value->set(kJscreInternalIndex, *internal); | 539 value->set(kJscreInternalIndex, *internal); |
| 473 Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value); | 540 Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value); |
| 474 | 541 |
| 475 return re; | 542 return re; |
| 476 } | 543 } |
| 477 | 544 |
| 478 | 545 |
| 479 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp, | 546 Handle<Object> RegExpImpl::JscreExec(Handle<JSRegExp> regexp, |
| 480 int num_captures, | 547 Handle<String> subject, |
| 481 Handle<String> two_byte_subject, | 548 Handle<Object> index) { |
| 482 int previous_index, | 549 ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE); |
| 483 int* offsets_vector, | 550 if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) { |
| 484 int offsets_vector_length) { | 551 Handle<Object> compile_result = JscreCompile(regexp); |
| 485 #ifdef DEBUG | 552 if (compile_result.is_null()) return compile_result; |
| 486 if (FLAG_trace_regexp_bytecodes) { | |
| 487 String* pattern = regexp->Pattern(); | |
| 488 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); | |
| 489 PrintF("\n\nSubject string: '%s'\n\n", *(two_byte_subject->ToCString())); | |
| 490 } | 553 } |
| 491 #endif | 554 ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray()); |
| 492 ASSERT(StringShape(*two_byte_subject).IsTwoByteRepresentation()); | |
| 493 ASSERT(two_byte_subject->IsFlat(StringShape(*two_byte_subject))); | |
| 494 bool rc; | |
| 495 | 555 |
| 496 for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) { | 556 int num_captures = JscreNumberOfCaptures(regexp); |
| 497 offsets_vector[i] = -1; | |
| 498 } | |
| 499 | 557 |
| 500 LOG(RegExpExecEvent(regexp, previous_index, two_byte_subject)); | 558 OffsetsVector offsets((num_captures + 1) * 3); |
| 501 | 559 |
| 502 FixedArray* irregexp = | 560 int previous_index = static_cast<int>(DoubleToInteger(index->Number())); |
| 503 FixedArray::cast(regexp->DataAt(JSRegExp::kIrregexpDataIndex)); | |
| 504 int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value(); | |
| 505 | 561 |
| 506 switch (tag) { | 562 Handle<String> subject16 = CachedStringToTwoByte(subject); |
| 507 case RegExpMacroAssembler::kIA32Implementation: { | |
| 508 #ifndef ARM | |
| 509 Code* code = Code::cast(irregexp->get(kIrregexpCodeIndex)); | |
| 510 Address start_addr = | |
| 511 Handle<SeqTwoByteString>::cast(two_byte_subject)->GetCharsAddress(); | |
| 512 int string_offset = | |
| 513 start_addr - reinterpret_cast<Address>(*two_byte_subject); | |
| 514 int start_offset = string_offset + previous_index * sizeof(uc16); | |
| 515 int end_offset = | |
| 516 string_offset + two_byte_subject->length() * sizeof(uc16); | |
| 517 rc = RegExpMacroAssemblerIA32::Execute(code, | |
| 518 two_byte_subject.location(), | |
| 519 start_offset, | |
| 520 end_offset, | |
| 521 offsets_vector, | |
| 522 previous_index == 0); | |
| 523 if (rc) { | |
| 524 // Capture values are relative to start_offset only. | |
| 525 for (int i = 0; i < offsets_vector_length; i++) { | |
| 526 if (offsets_vector[i] >= 0) { | |
| 527 offsets_vector[i] += previous_index; | |
| 528 } | |
| 529 } | |
| 530 } | |
| 531 break; | |
| 532 #else | |
| 533 UNIMPLEMENTED(); | |
| 534 rc = false; | |
| 535 break; | |
| 536 #endif | |
| 537 } | |
| 538 case RegExpMacroAssembler::kBytecodeImplementation: { | |
| 539 Handle<ByteArray> byte_codes = IrregexpCode(regexp); | |
| 540 | 563 |
| 541 rc = IrregexpInterpreter::Match(byte_codes, | 564 return JscreExecOnce(regexp, |
| 542 two_byte_subject, | 565 num_captures, |
| 543 offsets_vector, | 566 subject, |
| 544 previous_index); | 567 previous_index, |
| 545 break; | 568 subject16->GetTwoByteData(), |
| 546 } | 569 offsets.vector(), |
| 547 case RegExpMacroAssembler::kARMImplementation: | 570 offsets.length()); |
| 548 default: | |
| 549 UNREACHABLE(); | |
| 550 rc = false; | |
| 551 break; | |
| 552 } | |
| 553 | |
| 554 if (!rc) { | |
| 555 return Factory::null_value(); | |
| 556 } | |
| 557 | |
| 558 Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1)); | |
| 559 // The captures come in (start, end+1) pairs. | |
| 560 for (int i = 0; i < 2 * (num_captures+1); i += 2) { | |
| 561 array->set(i, Smi::FromInt(offsets_vector[i])); | |
| 562 array->set(i+1, Smi::FromInt(offsets_vector[i+1])); | |
| 563 } | |
| 564 return Factory::NewJSArrayWithElements(array); | |
| 565 } | 571 } |
| 566 | 572 |
| 567 | 573 |
| 568 Handle<Object> RegExpImpl::JscreExecOnce(Handle<JSRegExp> regexp, | 574 Handle<Object> RegExpImpl::JscreExecOnce(Handle<JSRegExp> regexp, |
| 569 int num_captures, | 575 int num_captures, |
| 570 Handle<String> subject, | 576 Handle<String> subject, |
| 571 int previous_index, | 577 int previous_index, |
| 572 const uc16* two_byte_subject, | 578 const uc16* two_byte_subject, |
| 573 int* offsets_vector, | 579 int* offsets_vector, |
| 574 int offsets_vector_length) { | 580 int offsets_vector_length) { |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 610 Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1)); | 616 Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1)); |
| 611 // The captures come in (start, end+1) pairs. | 617 // The captures come in (start, end+1) pairs. |
| 612 for (int i = 0; i < 2 * (num_captures+1); i += 2) { | 618 for (int i = 0; i < 2 * (num_captures+1); i += 2) { |
| 613 array->set(i, Smi::FromInt(offsets_vector[i])); | 619 array->set(i, Smi::FromInt(offsets_vector[i])); |
| 614 array->set(i+1, Smi::FromInt(offsets_vector[i+1])); | 620 array->set(i+1, Smi::FromInt(offsets_vector[i+1])); |
| 615 } | 621 } |
| 616 return Factory::NewJSArrayWithElements(array); | 622 return Factory::NewJSArrayWithElements(array); |
| 617 } | 623 } |
| 618 | 624 |
| 619 | 625 |
| 620 class OffsetsVector { | 626 Handle<Object> RegExpImpl::JscreExecGlobal(Handle<JSRegExp> regexp, |
| 621 public: | 627 Handle<String> subject) { |
| 622 inline OffsetsVector(int num_registers) | |
| 623 : offsets_vector_length_(num_registers) { | |
| 624 if (offsets_vector_length_ > kStaticOffsetsVectorSize) { | |
| 625 vector_ = NewArray<int>(offsets_vector_length_); | |
| 626 } else { | |
| 627 vector_ = static_offsets_vector_; | |
| 628 } | |
| 629 } | |
| 630 | |
| 631 | |
| 632 inline ~OffsetsVector() { | |
| 633 if (offsets_vector_length_ > kStaticOffsetsVectorSize) { | |
| 634 DeleteArray(vector_); | |
| 635 vector_ = NULL; | |
| 636 } | |
| 637 } | |
| 638 | |
| 639 | |
| 640 inline int* vector() { | |
| 641 return vector_; | |
| 642 } | |
| 643 | |
| 644 | |
| 645 inline int length() { | |
| 646 return offsets_vector_length_; | |
| 647 } | |
| 648 | |
| 649 private: | |
| 650 int* vector_; | |
| 651 int offsets_vector_length_; | |
| 652 static const int kStaticOffsetsVectorSize = 50; | |
| 653 static int static_offsets_vector_[kStaticOffsetsVectorSize]; | |
| 654 }; | |
| 655 | |
| 656 | |
| 657 int OffsetsVector::static_offsets_vector_[ | |
| 658 OffsetsVector::kStaticOffsetsVectorSize]; | |
| 659 | |
| 660 | |
| 661 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp, | |
| 662 Handle<String> subject, | |
| 663 Handle<Object> index) { | |
| 664 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); | |
| 665 ASSERT(!regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsUndefined()); | |
| 666 | |
| 667 // Prepare space for the return values. | |
| 668 int number_of_registers = IrregexpNumberOfRegisters(regexp); | |
| 669 OffsetsVector offsets(number_of_registers); | |
| 670 | |
| 671 int num_captures = IrregexpNumberOfCaptures(regexp); | |
| 672 | |
| 673 int previous_index = static_cast<int>(DoubleToInteger(index->Number())); | |
| 674 | |
| 675 Handle<String> subject16 = CachedStringToTwoByte(subject); | |
| 676 | |
| 677 Handle<Object> result(IrregexpExecOnce(regexp, | |
| 678 num_captures, | |
| 679 subject16, | |
| 680 previous_index, | |
| 681 offsets.vector(), | |
| 682 offsets.length())); | |
| 683 return result; | |
| 684 } | |
| 685 | |
| 686 | |
| 687 Handle<Object> RegExpImpl::JscreExec(Handle<JSRegExp> regexp, | |
| 688 Handle<String> subject, | |
| 689 Handle<Object> index) { | |
| 690 ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE); | 628 ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE); |
| 691 if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) { | 629 if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) { |
| 692 Handle<Object> compile_result = JscreCompile(regexp); | 630 Handle<Object> compile_result = JscreCompile(regexp); |
| 693 if (compile_result.is_null()) return compile_result; | 631 if (compile_result.is_null()) return compile_result; |
| 694 } | 632 } |
| 695 ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray()); | 633 ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray()); |
| 696 | 634 |
| 635 // Prepare space for the return values. | |
| 697 int num_captures = JscreNumberOfCaptures(regexp); | 636 int num_captures = JscreNumberOfCaptures(regexp); |
| 698 | 637 |
| 699 OffsetsVector offsets((num_captures + 1) * 3); | 638 OffsetsVector offsets((num_captures + 1) * 3); |
| 700 | 639 |
| 701 int previous_index = static_cast<int>(DoubleToInteger(index->Number())); | |
| 702 | |
| 703 Handle<String> subject16 = CachedStringToTwoByte(subject); | |
| 704 | |
| 705 Handle<Object> result(JscreExecOnce(regexp, | |
| 706 num_captures, | |
| 707 subject, | |
| 708 previous_index, | |
| 709 subject16->GetTwoByteData(), | |
| 710 offsets.vector(), | |
| 711 offsets.length())); | |
| 712 | |
| 713 return result; | |
| 714 } | |
| 715 | |
| 716 | |
| 717 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp, | |
| 718 Handle<String> subject) { | |
| 719 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); | |
| 720 ASSERT(!regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsUndefined()); | |
| 721 | |
| 722 // Prepare space for the return values. | |
| 723 int number_of_registers = IrregexpNumberOfRegisters(regexp); | |
| 724 OffsetsVector offsets(number_of_registers); | |
| 725 | |
| 726 int previous_index = 0; | 640 int previous_index = 0; |
| 727 | 641 |
| 728 Handle<JSArray> result = Factory::NewJSArray(0); | 642 Handle<JSArray> result = Factory::NewJSArray(0); |
| 729 int i = 0; | 643 int i = 0; |
| 730 Handle<Object> matches; | 644 Handle<Object> matches; |
| 731 | 645 |
| 732 Handle<String> subject16 = CachedStringToTwoByte(subject); | 646 Handle<String> subject16 = CachedStringToTwoByte(subject); |
| 733 | 647 |
| 734 do { | 648 do { |
| 735 if (previous_index > subject->length() || previous_index < 0) { | 649 if (previous_index > subject->length() || previous_index < 0) { |
| 736 // Per ECMA-262 15.10.6.2, if the previous index is greater than the | 650 // Per ECMA-262 15.10.6.2, if the previous index is greater than the |
| 737 // string length, there is no match. | 651 // string length, there is no match. |
| 738 matches = Factory::null_value(); | 652 matches = Factory::null_value(); |
| 739 } else { | 653 } else { |
| 740 matches = IrregexpExecOnce(regexp, | 654 matches = JscreExecOnce(regexp, |
| 741 IrregexpNumberOfCaptures(regexp), | 655 num_captures, |
| 742 subject16, | 656 subject, |
| 743 previous_index, | 657 previous_index, |
| 744 offsets.vector(), | 658 subject16->GetTwoByteData(), |
| 745 offsets.length()); | 659 offsets.vector(), |
| 660 offsets.length()); | |
| 746 | 661 |
| 747 if (matches->IsJSArray()) { | 662 if (matches->IsJSArray()) { |
| 748 SetElement(result, i, matches); | 663 SetElement(result, i, matches); |
| 749 i++; | 664 i++; |
| 750 previous_index = offsets.vector()[1]; | 665 previous_index = offsets.vector()[1]; |
| 751 if (offsets.vector()[0] == offsets.vector()[1]) { | 666 if (offsets.vector()[0] == offsets.vector()[1]) { |
| 752 previous_index++; | 667 previous_index++; |
| 753 } | 668 } |
| 754 } | 669 } |
| 755 } | 670 } |
| 756 } while (matches->IsJSArray()); | 671 } while (matches->IsJSArray()); |
| 757 | 672 |
| 758 // If we exited the loop with an exception, throw it. | 673 // If we exited the loop with an exception, throw it. |
| 759 if (matches->IsNull()) { | 674 if (matches->IsNull()) { |
| 760 // Exited loop normally. | 675 // Exited loop normally. |
| 761 return result; | 676 return result; |
| 762 } else { | 677 } else { |
| 763 // Exited loop with the exception in matches. | 678 // Exited loop with the exception in matches. |
| 764 return matches; | 679 return matches; |
| 765 } | 680 } |
| 766 } | 681 } |
| 767 | 682 |
| 768 | 683 |
| 769 Handle<Object> RegExpImpl::JscreExecGlobal(Handle<JSRegExp> regexp, | 684 // Irregexp implementation. |
| 770 Handle<String> subject) { | 685 |
| 771 ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE); | 686 |
| 772 if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) { | 687 static Handle<FixedArray> GetCompiledIrregexp(Handle<JSRegExp> re, |
| 773 Handle<Object> compile_result = JscreCompile(regexp); | 688 bool is_ascii) { |
| 774 if (compile_result.is_null()) return compile_result; | 689 ASSERT(re->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray()); |
| 690 Handle<FixedArray> alternatives( | |
| 691 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex))); | |
| 692 ASSERT_EQ(2, alternatives->length()); | |
| 693 | |
| 694 int index = is_ascii ? 0 : 1; | |
| 695 Object* entry = alternatives->get(index); | |
| 696 if (!entry->IsNull()) { | |
| 697 return Handle<FixedArray>(FixedArray::cast(entry)); | |
| 775 } | 698 } |
| 776 ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray()); | 699 |
| 700 // Compile the RegExp. | |
| 701 ZoneScope zone_scope(DELETE_ON_EXIT); | |
| 702 | |
| 703 JSRegExp::Flags flags = re->GetFlags(); | |
| 704 | |
| 705 Handle<String> pattern(re->Pattern()); | |
| 706 StringShape shape(*pattern); | |
| 707 if (!pattern->IsFlat(shape)) { | |
| 708 pattern->Flatten(shape); | |
| 709 } | |
| 710 | |
| 711 RegExpParseResult parse_result; | |
| 712 FlatStringReader reader(pattern); | |
| 713 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { | |
| 714 // Throw an exception if we fail to parse the pattern. | |
| 715 // THIS SHOULD NOT HAPPEN. We already parsed it successfully once. | |
| 716 ThrowRegExpException(re, | |
| 717 pattern, | |
| 718 parse_result.error, | |
| 719 "malformed_regexp"); | |
| 720 return Handle<FixedArray>::null(); | |
| 721 } | |
| 722 Handle<FixedArray> compiled_entry = | |
| 723 RegExpEngine::Compile(&parse_result, | |
| 724 NULL, | |
| 725 flags.is_ignore_case(), | |
| 726 flags.is_multiline(), | |
| 727 pattern, | |
| 728 is_ascii); | |
| 729 if (!compiled_entry.is_null()) { | |
| 730 alternatives->set(index, *compiled_entry); | |
| 731 } | |
| 732 return compiled_entry; | |
| 733 } | |
| 734 | |
| 735 | |
| 736 int RegExpImpl::IrregexpNumberOfCaptures(Handle<FixedArray> irre) { | |
| 737 return Smi::cast(irre->get(kIrregexpNumberOfCapturesIndex))->value(); | |
| 738 } | |
| 739 | |
| 740 | |
| 741 int RegExpImpl::IrregexpNumberOfRegisters(Handle<FixedArray> irre) { | |
| 742 return Smi::cast(irre->get(kIrregexpNumberOfRegistersIndex))->value(); | |
| 743 } | |
| 744 | |
| 745 | |
| 746 Handle<ByteArray> RegExpImpl::IrregexpByteCode(Handle<FixedArray> irre) { | |
| 747 ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value() | |
| 748 == RegExpMacroAssembler::kBytecodeImplementation); | |
| 749 return Handle<ByteArray>(ByteArray::cast(irre->get(kIrregexpCodeIndex))); | |
| 750 } | |
| 751 | |
| 752 | |
| 753 Handle<Code> RegExpImpl::IrregexpNativeCode(Handle<FixedArray> irre) { | |
| 754 ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value() | |
| 755 != RegExpMacroAssembler::kBytecodeImplementation); | |
| 756 return Handle<Code>(Code::cast(irre->get(kIrregexpCodeIndex))); | |
| 757 } | |
| 758 | |
| 759 | |
| 760 Handle<Object>RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re, | |
| 761 Handle<String> pattern, | |
| 762 JSRegExp::Flags flags) { | |
| 763 // Make space for ASCII and UC16 versions. | |
| 764 Handle<FixedArray> alternatives = Factory::NewFixedArray(2); | |
| 765 alternatives->set_null(0); | |
| 766 alternatives->set_null(1); | |
| 767 Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags, alternatives); | |
| 768 return re; | |
| 769 } | |
| 770 | |
| 771 | |
| 772 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp, | |
| 773 Handle<String> subject, | |
| 774 Handle<Object> index) { | |
| 775 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); | |
| 776 ASSERT(regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray()); | |
| 777 | |
| 778 bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); | |
| 779 Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii); | |
| 780 if (irregexp.is_null()) { | |
| 781 // We can't handle the RegExp with IRRegExp. | |
| 782 return Handle<Object>::null(); | |
| 783 } | |
| 777 | 784 |
| 778 // Prepare space for the return values. | 785 // Prepare space for the return values. |
| 779 int num_captures = JscreNumberOfCaptures(regexp); | 786 int number_of_registers = IrregexpNumberOfRegisters(irregexp); |
| 787 OffsetsVector offsets(number_of_registers); | |
| 780 | 788 |
| 781 OffsetsVector offsets((num_captures + 1) * 3); | 789 int num_captures = IrregexpNumberOfCaptures(irregexp); |
| 790 | |
| 791 int previous_index = static_cast<int>(DoubleToInteger(index->Number())); | |
| 792 | |
| 793 #ifdef DEBUG | |
| 794 if (FLAG_trace_regexp_bytecodes) { | |
| 795 String* pattern = regexp->Pattern(); | |
| 796 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); | |
| 797 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); | |
| 798 } | |
| 799 #endif | |
| 800 LOG(RegExpExecEvent(regexp, previous_index, subject)); | |
| 801 return IrregexpExecOnce(irregexp, | |
| 802 num_captures, | |
| 803 subject, | |
| 804 previous_index, | |
| 805 offsets.vector(), | |
| 806 offsets.length()); | |
| 807 } | |
| 808 | |
| 809 | |
| 810 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp, | |
| 811 Handle<String> subject) { | |
| 812 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); | |
| 813 | |
| 814 StringShape shape(*subject); | |
| 815 bool is_ascii = shape.IsAsciiRepresentation(); | |
| 816 Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii); | |
| 817 if (irregexp.is_null()) { | |
| 818 return Handle<Object>::null(); | |
| 819 } | |
| 820 | |
| 821 // Prepare space for the return values. | |
| 822 int number_of_registers = IrregexpNumberOfRegisters(irregexp); | |
| 823 OffsetsVector offsets(number_of_registers); | |
| 782 | 824 |
| 783 int previous_index = 0; | 825 int previous_index = 0; |
| 784 | 826 |
| 785 Handle<JSArray> result = Factory::NewJSArray(0); | 827 Handle<JSArray> result = Factory::NewJSArray(0); |
| 786 int i = 0; | 828 int i = 0; |
| 787 Handle<Object> matches; | 829 Handle<Object> matches; |
| 788 | 830 |
| 789 Handle<String> subject16 = CachedStringToTwoByte(subject); | 831 if (!subject->IsFlat(shape)) { |
| 832 subject->Flatten(shape); | |
| 833 } | |
| 790 | 834 |
| 791 do { | 835 do { |
| 792 if (previous_index > subject->length() || previous_index < 0) { | 836 if (previous_index > subject->length() || previous_index < 0) { |
| 793 // Per ECMA-262 15.10.6.2, if the previous index is greater than the | 837 // Per ECMA-262 15.10.6.2, if the previous index is greater than the |
| 794 // string length, there is no match. | 838 // string length, there is no match. |
| 795 matches = Factory::null_value(); | 839 matches = Factory::null_value(); |
| 796 } else { | 840 } else { |
| 797 matches = JscreExecOnce(regexp, | 841 #ifdef DEBUG |
| 798 num_captures, | 842 if (FLAG_trace_regexp_bytecodes) { |
| 799 subject, | 843 String* pattern = regexp->Pattern(); |
| 800 previous_index, | 844 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); |
| 801 subject16->GetTwoByteData(), | 845 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); |
| 802 offsets.vector(), | 846 } |
| 803 offsets.length()); | 847 #endif |
| 848 LOG(RegExpExecEvent(regexp, previous_index, subject)); | |
| 849 matches = IrregexpExecOnce(irregexp, | |
| 850 IrregexpNumberOfCaptures(irregexp), | |
| 851 subject, | |
| 852 previous_index, | |
| 853 offsets.vector(), | |
| 854 offsets.length()); | |
| 804 | 855 |
| 805 if (matches->IsJSArray()) { | 856 if (matches->IsJSArray()) { |
| 806 SetElement(result, i, matches); | 857 SetElement(result, i, matches); |
| 807 i++; | 858 i++; |
| 808 previous_index = offsets.vector()[1]; | 859 previous_index = offsets.vector()[1]; |
| 809 if (offsets.vector()[0] == offsets.vector()[1]) { | 860 if (offsets.vector()[0] == offsets.vector()[1]) { |
| 810 previous_index++; | 861 previous_index++; |
| 811 } | 862 } |
| 812 } | 863 } |
| 813 } | 864 } |
| 814 } while (matches->IsJSArray()); | 865 } while (matches->IsJSArray()); |
| 815 | 866 |
| 816 // If we exited the loop with an exception, throw it. | 867 // If we exited the loop with an exception, throw it. |
| 817 if (matches->IsNull()) { | 868 if (matches->IsNull()) { |
| 818 // Exited loop normally. | 869 // Exited loop normally. |
| 819 return result; | 870 return result; |
| 820 } else { | 871 } else { |
| 821 // Exited loop with the exception in matches. | 872 // Exited loop with the exception in matches. |
| 822 return matches; | 873 return matches; |
| 823 } | 874 } |
| 824 } | 875 } |
| 825 | 876 |
| 826 | 877 |
| 827 int RegExpImpl::JscreNumberOfCaptures(Handle<JSRegExp> re) { | 878 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> irregexp, |
| 828 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex)); | 879 int num_captures, |
| 829 return Smi::cast(value->get(kJscreNumberOfCapturesIndex))->value(); | 880 Handle<String> subject, |
| 881 int previous_index, | |
| 882 int* offsets_vector, | |
| 883 int offsets_vector_length) { | |
| 884 bool rc; | |
| 885 | |
| 886 int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value(); | |
| 887 | |
| 888 switch (tag) { | |
| 889 case RegExpMacroAssembler::kIA32Implementation: { | |
| 890 #ifndef ARM | |
| 891 if (!subject->IsFlat(StringShape(*subject))) { | |
| 892 FlattenString(subject); | |
| 893 } | |
| 894 Handle<Code> code = IrregexpNativeCode(irregexp); | |
| 895 | |
| 896 StringShape shape(*subject); | |
| 897 | |
| 898 // Character offsets into string. | |
| 899 int start_offset = previous_index; | |
| 900 int end_offset = subject->length(shape); | |
| 901 | |
| 902 if (shape.IsCons()) { | |
| 903 subject = Handle<String>(ConsString::cast(*subject)->first()); | |
| 904 } else if (shape.IsSliced()) { | |
| 905 SlicedString* slice = SlicedString::cast(*subject); | |
| 906 start_offset += slice->start(); | |
| 907 end_offset += slice->start(); | |
| 908 subject = Handle<String>(slice->buffer()); | |
| 909 } | |
| 910 | |
| 911 // String is now either Sequential or External | |
| 912 StringShape flatshape(*subject); | |
| 913 bool is_ascii = flatshape.IsAsciiRepresentation(); | |
| 914 int char_size = is_ascii ? sizeof(char) : sizeof(uc16); // NOLINT | |
|
Erik Corry
2008/12/08 12:47:52
It's part of the definition of the language that s
| |
| 915 | |
| 916 if (flatshape.IsExternal()) { | |
| 917 const byte* address; | |
| 918 if (is_ascii) { | |
| 919 ExternalAsciiString* ext = ExternalAsciiString::cast(*subject); | |
| 920 address = reinterpret_cast<const byte*>(ext->resource()->data()); | |
| 921 } else { | |
| 922 ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject); | |
| 923 address = reinterpret_cast<const byte*>(ext->resource()->data()); | |
| 924 } | |
| 925 rc = RegExpMacroAssemblerIA32::Execute( | |
| 926 *code, | |
| 927 &address, | |
| 928 start_offset * char_size, | |
| 929 end_offset * char_size, | |
| 930 offsets_vector, | |
| 931 previous_index == 0); | |
| 932 } else { // Sequential string | |
| 933 int byte_offset = | |
| 934 is_ascii ? SeqAsciiString::kHeaderSize - kHeapObjectTag: | |
| 935 SeqTwoByteString::kHeaderSize - kHeapObjectTag; | |
|
Erik Corry
2008/12/08 12:47:52
SeqAsciiString and SeqTwoByteString have methods f
| |
| 936 rc = RegExpMacroAssemblerIA32::Execute( | |
| 937 *code, | |
| 938 subject.location(), | |
| 939 byte_offset + start_offset * char_size, | |
| 940 byte_offset + end_offset * char_size, | |
| 941 offsets_vector, | |
| 942 previous_index == 0); | |
| 943 } | |
| 944 | |
| 945 if (rc) { | |
| 946 // Capture values are relative to start_offset only. | |
| 947 for (int i = 0; i < offsets_vector_length; i++) { | |
| 948 if (offsets_vector[i] >= 0) { | |
| 949 offsets_vector[i] += previous_index; | |
| 950 } | |
| 951 } | |
| 952 } | |
| 953 break; | |
| 954 #else | |
| 955 UNIMPLEMENTED(); | |
| 956 rc = false; | |
| 957 break; | |
| 958 #endif | |
| 959 } | |
| 960 case RegExpMacroAssembler::kBytecodeImplementation: { | |
| 961 for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) { | |
| 962 offsets_vector[i] = -1; | |
| 963 } | |
| 964 Handle<ByteArray> byte_codes = IrregexpByteCode(irregexp); | |
| 965 | |
| 966 Handle<String> two_byte_subject = CachedStringToTwoByte(subject); | |
| 967 | |
| 968 rc = IrregexpInterpreter::Match(byte_codes, | |
| 969 two_byte_subject, | |
| 970 offsets_vector, | |
| 971 previous_index); | |
| 972 break; | |
| 973 } | |
| 974 case RegExpMacroAssembler::kARMImplementation: | |
| 975 default: | |
| 976 UNREACHABLE(); | |
| 977 rc = false; | |
| 978 break; | |
| 979 } | |
| 980 | |
| 981 if (!rc) { | |
| 982 return Factory::null_value(); | |
| 983 } | |
| 984 | |
| 985 Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1)); | |
| 986 // The captures come in (start, end+1) pairs. | |
| 987 for (int i = 0; i < 2 * (num_captures+1); i += 2) { | |
| 988 array->set(i, Smi::FromInt(offsets_vector[i])); | |
| 989 array->set(i+1, Smi::FromInt(offsets_vector[i+1])); | |
| 990 } | |
| 991 return Factory::NewJSArrayWithElements(array); | |
| 830 } | 992 } |
| 831 | 993 |
| 832 | 994 |
| 833 ByteArray* RegExpImpl::JscreInternal(Handle<JSRegExp> re) { | |
| 834 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex)); | |
| 835 return ByteArray::cast(value->get(kJscreInternalIndex)); | |
| 836 } | |
| 837 | |
| 838 | |
| 839 int RegExpImpl::IrregexpNumberOfCaptures(Handle<JSRegExp> re) { | |
| 840 FixedArray* value = | |
| 841 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex)); | |
| 842 return Smi::cast(value->get(kIrregexpNumberOfCapturesIndex))->value(); | |
| 843 } | |
| 844 | |
| 845 | |
| 846 int RegExpImpl::IrregexpNumberOfRegisters(Handle<JSRegExp> re) { | |
| 847 FixedArray* value = | |
| 848 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex)); | |
| 849 return Smi::cast(value->get(kIrregexpNumberOfRegistersIndex))->value(); | |
| 850 } | |
| 851 | |
| 852 | |
| 853 Handle<ByteArray> RegExpImpl::IrregexpCode(Handle<JSRegExp> re) { | |
| 854 FixedArray* value = | |
| 855 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex)); | |
| 856 return Handle<ByteArray>(ByteArray::cast(value->get(kIrregexpCodeIndex))); | |
| 857 } | |
| 858 | |
| 859 | |
| 860 // ------------------------------------------------------------------- | 995 // ------------------------------------------------------------------- |
| 861 // Implmentation of the Irregexp regular expression engine. | 996 // Implmentation of the Irregexp regular expression engine. |
| 862 // | 997 // |
| 863 // The Irregexp regular expression engine is intended to be a complete | 998 // The Irregexp regular expression engine is intended to be a complete |
| 864 // implementation of ECMAScript regular expressions. It generates either | 999 // implementation of ECMAScript regular expressions. It generates either |
| 865 // bytecodes or native code. | 1000 // bytecodes or native code. |
| 866 | 1001 |
| 867 // The Irregexp regexp engine is structured in three steps. | 1002 // The Irregexp regexp engine is structured in three steps. |
| 868 // 1) The parser generates an abstract syntax tree. See ast.cc. | 1003 // 1) The parser generates an abstract syntax tree. See ast.cc. |
| 869 // 2) From the AST a node network is created. The nodes are all | 1004 // 2) From the AST a node network is created. The nodes are all |
| (...skipping 2598 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3468 void DispatchTableConstructor::VisitAction(ActionNode* that) { | 3603 void DispatchTableConstructor::VisitAction(ActionNode* that) { |
| 3469 RegExpNode* target = that->on_success(); | 3604 RegExpNode* target = that->on_success(); |
| 3470 target->Accept(this); | 3605 target->Accept(this); |
| 3471 } | 3606 } |
| 3472 | 3607 |
| 3473 | 3608 |
| 3474 Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input, | 3609 Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input, |
| 3475 RegExpNode** node_return, | 3610 RegExpNode** node_return, |
| 3476 bool ignore_case, | 3611 bool ignore_case, |
| 3477 bool is_multiline, | 3612 bool is_multiline, |
| 3478 Handle<String> pattern) { | 3613 Handle<String> pattern, |
| 3614 bool is_ascii) { | |
| 3479 RegExpCompiler compiler(input->capture_count, ignore_case); | 3615 RegExpCompiler compiler(input->capture_count, ignore_case); |
| 3480 // Wrap the body of the regexp in capture #0. | 3616 // Wrap the body of the regexp in capture #0. |
| 3481 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree, | 3617 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree, |
| 3482 0, | 3618 0, |
| 3483 &compiler, | 3619 &compiler, |
| 3484 compiler.accept()); | 3620 compiler.accept()); |
| 3485 // Add a .*? at the beginning, outside the body capture. | 3621 // Add a .*? at the beginning, outside the body capture. |
| 3486 // Note: We could choose to not add this if the regexp is anchored at | 3622 // Note: We could choose to not add this if the regexp is anchored at |
| 3487 // the start of the input but I'm not sure how best to do that and | 3623 // the start of the input but I'm not sure how best to do that and |
| 3488 // since we don't even handle ^ yet I'm saving that optimization for | 3624 // since we don't even handle ^ yet I'm saving that optimization for |
| 3489 // later. | 3625 // later. |
| 3490 RegExpNode* node = RegExpQuantifier::ToNode(0, | 3626 RegExpNode* node = RegExpQuantifier::ToNode(0, |
| 3491 RegExpQuantifier::kInfinity, | 3627 RegExpQuantifier::kInfinity, |
| 3492 false, | 3628 false, |
| 3493 new RegExpCharacterClass('*'), | 3629 new RegExpCharacterClass('*'), |
| 3494 &compiler, | 3630 &compiler, |
| 3495 captured_body); | 3631 captured_body); |
| 3496 if (node_return != NULL) *node_return = node; | 3632 if (node_return != NULL) *node_return = node; |
| 3497 Analysis analysis(ignore_case); | 3633 Analysis analysis(ignore_case); |
| 3498 analysis.EnsureAnalyzed(node); | 3634 analysis.EnsureAnalyzed(node); |
| 3499 | 3635 |
| 3500 NodeInfo info = *node->info(); | 3636 NodeInfo info = *node->info(); |
| 3501 node = node->EnsureExpanded(&info); | 3637 node = node->EnsureExpanded(&info); |
| 3502 | 3638 |
| 3503 if (!FLAG_irregexp) { | |
| 3504 return Handle<FixedArray>::null(); | |
| 3505 } | |
| 3506 | |
| 3507 if (is_multiline && !FLAG_attempt_multiline_irregexp) { | 3639 if (is_multiline && !FLAG_attempt_multiline_irregexp) { |
| 3508 return Handle<FixedArray>::null(); | 3640 return Handle<FixedArray>::null(); |
| 3509 } | 3641 } |
| 3510 | 3642 |
| 3511 if (FLAG_irregexp_native) { | 3643 if (FLAG_irregexp_native) { |
| 3512 #ifdef ARM | 3644 #ifdef ARM |
| 3513 // Unimplemented, fall-through to bytecode implementation. | 3645 // Unimplemented, fall-through to bytecode implementation. |
| 3514 #else // IA32 | 3646 #else // IA32 |
| 3515 RegExpMacroAssemblerIA32 macro_assembler(RegExpMacroAssemblerIA32::UC16, | 3647 RegExpMacroAssemblerIA32::Mode mode; |
| 3648 if (is_ascii) { | |
| 3649 mode = RegExpMacroAssemblerIA32::ASCII; | |
| 3650 } else { | |
| 3651 mode = RegExpMacroAssemblerIA32::UC16; | |
| 3652 } | |
| 3653 RegExpMacroAssemblerIA32 macro_assembler(mode, | |
| 3516 (input->capture_count + 1) * 2); | 3654 (input->capture_count + 1) * 2); |
| 3517 return compiler.Assemble(¯o_assembler, | 3655 return compiler.Assemble(¯o_assembler, |
| 3518 node, | 3656 node, |
| 3519 input->capture_count, | 3657 input->capture_count, |
| 3520 pattern); | 3658 pattern); |
| 3521 #endif | 3659 #endif |
| 3522 } | 3660 } |
| 3523 EmbeddedVector<byte, 1024> codes; | 3661 EmbeddedVector<byte, 1024> codes; |
| 3524 RegExpMacroAssemblerIrregexp macro_assembler(codes); | 3662 RegExpMacroAssemblerIrregexp macro_assembler(codes); |
| 3525 return compiler.Assemble(¯o_assembler, | 3663 return compiler.Assemble(¯o_assembler, |
| 3526 node, | 3664 node, |
| 3527 input->capture_count, | 3665 input->capture_count, |
| 3528 pattern); | 3666 pattern); |
| 3529 } | 3667 } |
| 3530 | 3668 |
| 3531 | 3669 |
| 3532 }} // namespace v8::internal | 3670 }} // namespace v8::internal |
| OLD | NEW |