Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 142 // Parse-tree is a single atom that is equal to the pattern. | 142 // Parse-tree is a single atom that is equal to the pattern. |
| 143 AtomCompile(re, pattern, flags, pattern); | 143 AtomCompile(re, pattern, flags, pattern); |
| 144 } else if (parse_result.tree->IsAtom() && | 144 } else if (parse_result.tree->IsAtom() && |
| 145 !flags.is_ignore_case() && | 145 !flags.is_ignore_case() && |
| 146 parse_result.capture_count == 0) { | 146 parse_result.capture_count == 0) { |
| 147 RegExpAtom* atom = parse_result.tree->AsAtom(); | 147 RegExpAtom* atom = parse_result.tree->AsAtom(); |
| 148 Vector<const uc16> atom_pattern = atom->data(); | 148 Vector<const uc16> atom_pattern = atom->data(); |
| 149 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern); | 149 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern); |
| 150 AtomCompile(re, pattern, flags, atom_string); | 150 AtomCompile(re, pattern, flags, atom_string); |
| 151 } else { | 151 } else { |
| 152 IrregexpPrepare(re, pattern, flags, parse_result.capture_count); | 152 IrregexpInitialize(re, pattern, flags, parse_result.capture_count); |
| 153 } | 153 } |
| 154 ASSERT(re->data()->IsFixedArray()); | 154 ASSERT(re->data()->IsFixedArray()); |
| 155 // Compilation succeeded so the data is set on the regexp | 155 // Compilation succeeded so the data is set on the regexp |
| 156 // and we can store it in the cache. | 156 // and we can store it in the cache. |
| 157 Handle<FixedArray> data(FixedArray::cast(re->data())); | 157 Handle<FixedArray> data(FixedArray::cast(re->data())); |
| 158 CompilationCache::PutRegExp(pattern, flags, data); | 158 CompilationCache::PutRegExp(pattern, flags, data); |
| 159 | 159 |
| 160 return re; | 160 return re; |
| 161 } | 161 } |
| 162 | 162 |
| (...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 334 ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) { | 334 ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) { |
| 335 return ByteArray::cast(re->get(JSRegExp::code_index(is_ascii))); | 335 return ByteArray::cast(re->get(JSRegExp::code_index(is_ascii))); |
| 336 } | 336 } |
| 337 | 337 |
| 338 | 338 |
| 339 Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) { | 339 Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) { |
| 340 return Code::cast(re->get(JSRegExp::code_index(is_ascii))); | 340 return Code::cast(re->get(JSRegExp::code_index(is_ascii))); |
| 341 } | 341 } |
| 342 | 342 |
| 343 | 343 |
| 344 void RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re, | 344 void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re, |
| 345 Handle<String> pattern, | 345 Handle<String> pattern, |
|
Erik Corry
2010/03/19 11:04:11
indent
| |
| 346 JSRegExp::Flags flags, | 346 JSRegExp::Flags flags, |
| 347 int capture_count) { | 347 int capture_count) { |
| 348 // Initialize compiled code entries to null. | 348 // Initialize compiled code entries to null. |
| 349 Factory::SetRegExpIrregexpData(re, | 349 Factory::SetRegExpIrregexpData(re, |
| 350 JSRegExp::IRREGEXP, | 350 JSRegExp::IRREGEXP, |
| 351 pattern, | 351 pattern, |
| 352 flags, | 352 flags, |
| 353 capture_count); | 353 capture_count); |
| 354 } | 354 } |
| 355 | 355 |
| 356 | 356 |
| 357 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, | |
| 358 Handle<String> subject) { | |
| 359 if (!subject->IsFlat()) { | |
| 360 FlattenString(subject); | |
| 361 } | |
| 362 bool is_ascii = subject->IsAsciiRepresentation(); | |
| 363 if (!EnsureCompiledIrregexp(regexp, is_ascii)) { | |
| 364 return -1; | |
| 365 } | |
| 366 #ifdef V8_NATIVE_REGEXP | |
| 367 // Native regexp only needs room to output captures. Registers are handled | |
| 368 // internally. | |
| 369 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; | |
| 370 #else // !V8_NATIVE_REGEXP | |
| 371 // Byte-code regexp needs space allocated for all its registers. | |
| 372 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())); | |
| 373 #endif // V8_NATIVE_REGEXP | |
|
Erik Corry
2010/03/19 11:04:11
V8 -> !V8
Actually I prefer the comment // ndef
Lasse Reichstein
2010/03/19 11:25:42
Changed to ndef.
| |
| 374 } | |
| 375 | |
| 376 | |
| 377 RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp, | |
| 378 Handle<String> subject, | |
| 379 int index, | |
| 380 Vector<int> output) { | |
| 381 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data())); | |
| 382 | |
| 383 #ifdef V8_NATIVE_REGEXP | |
| 384 ASSERT(output.length() >= | |
|
Erik Corry
2010/03/19 11:04:11
please move this assert into the ifdef below
Lasse Reichstein
2010/03/19 11:25:42
Done.
| |
| 385 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); | |
| 386 #else | |
| 387 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp)); | |
| 388 #endif | |
| 389 ASSERT(index >= 0); | |
| 390 ASSERT(index <= subject->length()); | |
| 391 ASSERT(subject->IsFlat()); | |
| 392 | |
| 393 #ifdef V8_NATIVE_REGEXP | |
| 394 do { | |
| 395 bool is_ascii = subject->IsAsciiRepresentation(); | |
| 396 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii)); | |
| 397 NativeRegExpMacroAssembler::Result res = | |
| 398 NativeRegExpMacroAssembler::Match(code, | |
| 399 subject, | |
| 400 output.start(), | |
| 401 output.length(), | |
| 402 index); | |
| 403 if (res != NativeRegExpMacroAssembler::RETRY) { | |
| 404 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || | |
| 405 Top::has_pending_exception()); | |
| 406 STATIC_ASSERT( | |
| 407 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); | |
| 408 STATIC_ASSERT( | |
| 409 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); | |
| 410 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) | |
| 411 == RE_EXCEPTION); | |
| 412 return static_cast<IrregexpResult>(res); | |
| 413 } | |
| 414 // If result is RETRY, the string have changed representation, and we | |
|
Erik Corry
2010/03/19 11:04:11
have -> has
| |
| 415 // must restart from scratch. | |
| 416 // In this case, it means we must make sure we are prepared to handle | |
| 417 // the, potentially, differen subject (the string can switch between | |
|
Erik Corry
2010/03/19 11:04:11
en -> ent
| |
| 418 // being internal and external, and even between being ASCII and UC16, | |
| 419 // but the characters are always the same). | |
| 420 IrregexpPrepare(regexp, subject); | |
| 421 } while (true); | |
| 422 UNREACHABLE(); | |
| 423 return RE_EXCEPTION; | |
| 424 #else // ! V8_NATIVE_REGEXP | |
| 425 | |
| 426 bool is_ascii = subject->IsAsciiRepresentation(); | |
| 427 // We must have done EnsureCompiledIrregexp, so we can get the number of | |
| 428 // registers. | |
| 429 int* register_vector = output.start(); | |
| 430 int number_of_capture_registers = | |
| 431 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; | |
| 432 for (int i = number_of_capture_registers - 1; i >= 0; i--) { | |
| 433 register_vector[i] = -1; | |
| 434 } | |
| 435 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii)); | |
| 436 | |
| 437 if (IrregexpInterpreter::Match(byte_codes, | |
| 438 subject, | |
| 439 register_vector, | |
| 440 index)) { | |
| 441 return RE_SUCCESS; | |
| 442 } | |
| 443 return RE_FAILURE; | |
| 444 #endif // V8_NATIVE_REGEXP | |
| 445 } | |
| 446 | |
| 447 | |
| 357 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp, | 448 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp, |
| 358 Handle<String> subject, | 449 Handle<String> subject, |
| 359 int previous_index, | 450 int previous_index, |
| 360 Handle<JSArray> last_match_info) { | 451 Handle<JSArray> last_match_info) { |
| 361 ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP); | 452 ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP); |
| 362 | 453 |
| 363 // Prepare space for the return values. | 454 // Prepare space for the return values. |
| 364 int number_of_capture_registers = | |
| 365 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2; | |
| 366 | |
| 367 #ifndef V8_NATIVE_REGEXP | 455 #ifndef V8_NATIVE_REGEXP |
| 368 #ifdef DEBUG | 456 #ifdef DEBUG |
| 369 if (FLAG_trace_regexp_bytecodes) { | 457 if (FLAG_trace_regexp_bytecodes) { |
| 370 String* pattern = jsregexp->Pattern(); | 458 String* pattern = jsregexp->Pattern(); |
| 371 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); | 459 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); |
| 372 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); | 460 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); |
| 373 } | 461 } |
| 374 #endif | 462 #endif |
| 375 #endif | 463 #endif |
| 376 | 464 int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject); |
| 377 if (!subject->IsFlat()) { | 465 if (required_registers < 0) { |
| 378 FlattenString(subject); | 466 // Compiling failed with an exception. |
| 379 } | |
| 380 | |
| 381 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); | |
| 382 | |
| 383 Handle<FixedArray> array; | |
| 384 | |
| 385 // Dispatch to the correct RegExp implementation. | |
| 386 Handle<FixedArray> regexp(FixedArray::cast(jsregexp->data())); | |
| 387 | |
| 388 #ifdef V8_NATIVE_REGEXP | |
| 389 | |
| 390 OffsetsVector captures(number_of_capture_registers); | |
| 391 int* captures_vector = captures.vector(); | |
| 392 NativeRegExpMacroAssembler::Result res; | |
| 393 do { | |
| 394 bool is_ascii = subject->IsAsciiRepresentation(); | |
| 395 if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) { | |
| 396 return Handle<Object>::null(); | |
| 397 } | |
| 398 Handle<Code> code(RegExpImpl::IrregexpNativeCode(*regexp, is_ascii)); | |
| 399 res = NativeRegExpMacroAssembler::Match(code, | |
| 400 subject, | |
| 401 captures_vector, | |
| 402 captures.length(), | |
| 403 previous_index); | |
| 404 // If result is RETRY, the string have changed representation, and we | |
| 405 // must restart from scratch. | |
| 406 } while (res == NativeRegExpMacroAssembler::RETRY); | |
| 407 if (res == NativeRegExpMacroAssembler::EXCEPTION) { | |
| 408 ASSERT(Top::has_pending_exception()); | 467 ASSERT(Top::has_pending_exception()); |
| 409 return Handle<Object>::null(); | 468 return Handle<Object>::null(); |
| 410 } | 469 } |
| 411 ASSERT(res == NativeRegExpMacroAssembler::SUCCESS | |
| 412 || res == NativeRegExpMacroAssembler::FAILURE); | |
| 413 | 470 |
| 414 if (res != NativeRegExpMacroAssembler::SUCCESS) return Factory::null_value(); | 471 OffsetsVector registers(required_registers); |
| 415 | 472 |
|
Erik Corry
2010/03/19 11:04:11
2 blank lines
Lasse Reichstein
2010/03/19 11:25:42
Ignored, per offline discussion.
| |
| 416 array = Handle<FixedArray>(FixedArray::cast(last_match_info->elements())); | 473 IrregexpResult res = IrregexpExecOnce(jsregexp, |
| 417 ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead); | 474 subject, |
| 418 // The captures come in (start, end+1) pairs. | 475 previous_index, |
| 419 for (int i = 0; i < number_of_capture_registers; i += 2) { | 476 Vector<int>(registers.vector(), |
| 420 // Capture values are relative to start_offset only. | 477 registers.length())); |
| 421 // Convert them to be relative to start of string. | 478 if (res == RE_SUCCESS) { |
| 422 if (captures_vector[i] >= 0) { | 479 int capture_register_count = |
| 423 captures_vector[i] += previous_index; | 480 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2; |
| 481 last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead); | |
| 482 AssertNoAllocation no_gc; | |
| 483 int* register_vector = registers.vector(); | |
| 484 FixedArray* array = FixedArray::cast(last_match_info->elements()); | |
| 485 for (int i = 0; i < capture_register_count; i += 2) { | |
| 486 SetCapture(array, i, register_vector[i]); | |
| 487 SetCapture(array, i + 1, register_vector[i + 1]); | |
| 424 } | 488 } |
| 425 if (captures_vector[i + 1] >= 0) { | 489 SetLastCaptureCount(array, capture_register_count); |
| 426 captures_vector[i + 1] += previous_index; | 490 SetLastSubject(array, *subject); |
| 427 } | 491 SetLastInput(array, *subject); |
| 428 SetCapture(*array, i, captures_vector[i]); | 492 return last_match_info; |
| 429 SetCapture(*array, i + 1, captures_vector[i + 1]); | |
| 430 } | 493 } |
| 431 | 494 if (res == RE_EXCEPTION) { |
| 432 #else // ! V8_NATIVE_REGEXP | 495 ASSERT(Top::has_pending_exception()); |
| 433 | |
| 434 bool is_ascii = subject->IsAsciiRepresentation(); | |
| 435 if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) { | |
| 436 return Handle<Object>::null(); | 496 return Handle<Object>::null(); |
| 437 } | 497 } |
| 438 // Now that we have done EnsureCompiledIrregexp we can get the number of | 498 ASSERT(res == RE_FAILURE); |
| 439 // registers. | 499 return Factory::null_value(); |
| 440 int number_of_registers = | |
| 441 IrregexpNumberOfRegisters(FixedArray::cast(jsregexp->data())); | |
| 442 OffsetsVector registers(number_of_registers); | |
| 443 int* register_vector = registers.vector(); | |
| 444 for (int i = number_of_capture_registers - 1; i >= 0; i--) { | |
| 445 register_vector[i] = -1; | |
| 446 } | |
| 447 Handle<ByteArray> byte_codes(IrregexpByteCode(*regexp, is_ascii)); | |
| 448 | |
| 449 if (!IrregexpInterpreter::Match(byte_codes, | |
| 450 subject, | |
| 451 register_vector, | |
| 452 previous_index)) { | |
| 453 return Factory::null_value(); | |
| 454 } | |
| 455 | |
| 456 array = Handle<FixedArray>(FixedArray::cast(last_match_info->elements())); | |
| 457 ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead); | |
| 458 // The captures come in (start, end+1) pairs. | |
| 459 for (int i = 0; i < number_of_capture_registers; i += 2) { | |
| 460 SetCapture(*array, i, register_vector[i]); | |
| 461 SetCapture(*array, i + 1, register_vector[i + 1]); | |
| 462 } | |
| 463 | |
| 464 #endif // V8_NATIVE_REGEXP | |
| 465 | |
| 466 SetLastCaptureCount(*array, number_of_capture_registers); | |
| 467 SetLastSubject(*array, *subject); | |
| 468 SetLastInput(*array, *subject); | |
| 469 | |
| 470 return last_match_info; | |
| 471 } | 500 } |
| 472 | 501 |
| 473 | 502 |
| 474 // ------------------------------------------------------------------- | 503 // ------------------------------------------------------------------- |
| 475 // Implementation of the Irregexp regular expression engine. | 504 // Implementation of the Irregexp regular expression engine. |
| 476 // | 505 // |
| 477 // The Irregexp regular expression engine is intended to be a complete | 506 // The Irregexp regular expression engine is intended to be a complete |
| 478 // implementation of ECMAScript regular expressions. It generates either | 507 // implementation of ECMAScript regular expressions. It generates either |
| 479 // bytecodes or native code. | 508 // bytecodes or native code. |
| 480 | 509 |
| (...skipping 4744 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5225 node, | 5254 node, |
| 5226 data->capture_count, | 5255 data->capture_count, |
| 5227 pattern); | 5256 pattern); |
| 5228 } | 5257 } |
| 5229 | 5258 |
| 5230 | 5259 |
| 5231 int OffsetsVector::static_offsets_vector_[ | 5260 int OffsetsVector::static_offsets_vector_[ |
| 5232 OffsetsVector::kStaticOffsetsVectorSize]; | 5261 OffsetsVector::kStaticOffsetsVectorSize]; |
| 5233 | 5262 |
| 5234 }} // namespace v8::internal | 5263 }} // namespace v8::internal |
| OLD | NEW |