OLD | NEW |
---|---|
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
142 // Parse-tree is a single atom that is equal to the pattern. | 142 // Parse-tree is a single atom that is equal to the pattern. |
143 AtomCompile(re, pattern, flags, pattern); | 143 AtomCompile(re, pattern, flags, pattern); |
144 } else if (parse_result.tree->IsAtom() && | 144 } else if (parse_result.tree->IsAtom() && |
145 !flags.is_ignore_case() && | 145 !flags.is_ignore_case() && |
146 parse_result.capture_count == 0) { | 146 parse_result.capture_count == 0) { |
147 RegExpAtom* atom = parse_result.tree->AsAtom(); | 147 RegExpAtom* atom = parse_result.tree->AsAtom(); |
148 Vector<const uc16> atom_pattern = atom->data(); | 148 Vector<const uc16> atom_pattern = atom->data(); |
149 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern); | 149 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern); |
150 AtomCompile(re, pattern, flags, atom_string); | 150 AtomCompile(re, pattern, flags, atom_string); |
151 } else { | 151 } else { |
152 IrregexpPrepare(re, pattern, flags, parse_result.capture_count); | 152 IrregexpInitialize(re, pattern, flags, parse_result.capture_count); |
153 } | 153 } |
154 ASSERT(re->data()->IsFixedArray()); | 154 ASSERT(re->data()->IsFixedArray()); |
155 // Compilation succeeded so the data is set on the regexp | 155 // Compilation succeeded so the data is set on the regexp |
156 // and we can store it in the cache. | 156 // and we can store it in the cache. |
157 Handle<FixedArray> data(FixedArray::cast(re->data())); | 157 Handle<FixedArray> data(FixedArray::cast(re->data())); |
158 CompilationCache::PutRegExp(pattern, flags, data); | 158 CompilationCache::PutRegExp(pattern, flags, data); |
159 | 159 |
160 return re; | 160 return re; |
161 } | 161 } |
162 | 162 |
(...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
334 ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) { | 334 ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) { |
335 return ByteArray::cast(re->get(JSRegExp::code_index(is_ascii))); | 335 return ByteArray::cast(re->get(JSRegExp::code_index(is_ascii))); |
336 } | 336 } |
337 | 337 |
338 | 338 |
339 Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) { | 339 Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) { |
340 return Code::cast(re->get(JSRegExp::code_index(is_ascii))); | 340 return Code::cast(re->get(JSRegExp::code_index(is_ascii))); |
341 } | 341 } |
342 | 342 |
343 | 343 |
344 void RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re, | 344 void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re, |
345 Handle<String> pattern, | 345 Handle<String> pattern, |
Erik Corry
2010/03/19 11:04:11
indent
| |
346 JSRegExp::Flags flags, | 346 JSRegExp::Flags flags, |
347 int capture_count) { | 347 int capture_count) { |
348 // Initialize compiled code entries to null. | 348 // Initialize compiled code entries to null. |
349 Factory::SetRegExpIrregexpData(re, | 349 Factory::SetRegExpIrregexpData(re, |
350 JSRegExp::IRREGEXP, | 350 JSRegExp::IRREGEXP, |
351 pattern, | 351 pattern, |
352 flags, | 352 flags, |
353 capture_count); | 353 capture_count); |
354 } | 354 } |
355 | 355 |
356 | 356 |
357 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, | |
358 Handle<String> subject) { | |
359 if (!subject->IsFlat()) { | |
360 FlattenString(subject); | |
361 } | |
362 bool is_ascii = subject->IsAsciiRepresentation(); | |
363 if (!EnsureCompiledIrregexp(regexp, is_ascii)) { | |
364 return -1; | |
365 } | |
366 #ifdef V8_NATIVE_REGEXP | |
367 // Native regexp only needs room to output captures. Registers are handled | |
368 // internally. | |
369 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; | |
370 #else // !V8_NATIVE_REGEXP | |
371 // Byte-code regexp needs space allocated for all its registers. | |
372 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())); | |
373 #endif // V8_NATIVE_REGEXP | |
Erik Corry
2010/03/19 11:04:11
V8 -> !V8
Actually I prefer the comment // ndef
Lasse Reichstein
2010/03/19 11:25:42
Changed to ndef.
| |
374 } | |
375 | |
376 | |
377 RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp, | |
378 Handle<String> subject, | |
379 int index, | |
380 Vector<int> output) { | |
381 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data())); | |
382 | |
383 #ifdef V8_NATIVE_REGEXP | |
384 ASSERT(output.length() >= | |
Erik Corry
2010/03/19 11:04:11
please move this assert into the ifdef below
Lasse Reichstein
2010/03/19 11:25:42
Done.
| |
385 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); | |
386 #else | |
387 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp)); | |
388 #endif | |
389 ASSERT(index >= 0); | |
390 ASSERT(index <= subject->length()); | |
391 ASSERT(subject->IsFlat()); | |
392 | |
393 #ifdef V8_NATIVE_REGEXP | |
394 do { | |
395 bool is_ascii = subject->IsAsciiRepresentation(); | |
396 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii)); | |
397 NativeRegExpMacroAssembler::Result res = | |
398 NativeRegExpMacroAssembler::Match(code, | |
399 subject, | |
400 output.start(), | |
401 output.length(), | |
402 index); | |
403 if (res != NativeRegExpMacroAssembler::RETRY) { | |
404 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || | |
405 Top::has_pending_exception()); | |
406 STATIC_ASSERT( | |
407 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); | |
408 STATIC_ASSERT( | |
409 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); | |
410 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) | |
411 == RE_EXCEPTION); | |
412 return static_cast<IrregexpResult>(res); | |
413 } | |
414 // If result is RETRY, the string have changed representation, and we | |
Erik Corry
2010/03/19 11:04:11
have -> has
| |
415 // must restart from scratch. | |
416 // In this case, it means we must make sure we are prepared to handle | |
417 // the, potentially, differen subject (the string can switch between | |
Erik Corry
2010/03/19 11:04:11
en -> ent
| |
418 // being internal and external, and even between being ASCII and UC16, | |
419 // but the characters are always the same). | |
420 IrregexpPrepare(regexp, subject); | |
421 } while (true); | |
422 UNREACHABLE(); | |
423 return RE_EXCEPTION; | |
424 #else // ! V8_NATIVE_REGEXP | |
425 | |
426 bool is_ascii = subject->IsAsciiRepresentation(); | |
427 // We must have done EnsureCompiledIrregexp, so we can get the number of | |
428 // registers. | |
429 int* register_vector = output.start(); | |
430 int number_of_capture_registers = | |
431 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; | |
432 for (int i = number_of_capture_registers - 1; i >= 0; i--) { | |
433 register_vector[i] = -1; | |
434 } | |
435 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii)); | |
436 | |
437 if (IrregexpInterpreter::Match(byte_codes, | |
438 subject, | |
439 register_vector, | |
440 index)) { | |
441 return RE_SUCCESS; | |
442 } | |
443 return RE_FAILURE; | |
444 #endif // V8_NATIVE_REGEXP | |
445 } | |
446 | |
447 | |
357 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp, | 448 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp, |
358 Handle<String> subject, | 449 Handle<String> subject, |
359 int previous_index, | 450 int previous_index, |
360 Handle<JSArray> last_match_info) { | 451 Handle<JSArray> last_match_info) { |
361 ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP); | 452 ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP); |
362 | 453 |
363 // Prepare space for the return values. | 454 // Prepare space for the return values. |
364 int number_of_capture_registers = | |
365 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2; | |
366 | |
367 #ifndef V8_NATIVE_REGEXP | 455 #ifndef V8_NATIVE_REGEXP |
368 #ifdef DEBUG | 456 #ifdef DEBUG |
369 if (FLAG_trace_regexp_bytecodes) { | 457 if (FLAG_trace_regexp_bytecodes) { |
370 String* pattern = jsregexp->Pattern(); | 458 String* pattern = jsregexp->Pattern(); |
371 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); | 459 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); |
372 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); | 460 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); |
373 } | 461 } |
374 #endif | 462 #endif |
375 #endif | 463 #endif |
376 | 464 int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject); |
377 if (!subject->IsFlat()) { | 465 if (required_registers < 0) { |
378 FlattenString(subject); | 466 // Compiling failed with an exception. |
379 } | |
380 | |
381 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); | |
382 | |
383 Handle<FixedArray> array; | |
384 | |
385 // Dispatch to the correct RegExp implementation. | |
386 Handle<FixedArray> regexp(FixedArray::cast(jsregexp->data())); | |
387 | |
388 #ifdef V8_NATIVE_REGEXP | |
389 | |
390 OffsetsVector captures(number_of_capture_registers); | |
391 int* captures_vector = captures.vector(); | |
392 NativeRegExpMacroAssembler::Result res; | |
393 do { | |
394 bool is_ascii = subject->IsAsciiRepresentation(); | |
395 if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) { | |
396 return Handle<Object>::null(); | |
397 } | |
398 Handle<Code> code(RegExpImpl::IrregexpNativeCode(*regexp, is_ascii)); | |
399 res = NativeRegExpMacroAssembler::Match(code, | |
400 subject, | |
401 captures_vector, | |
402 captures.length(), | |
403 previous_index); | |
404 // If result is RETRY, the string have changed representation, and we | |
405 // must restart from scratch. | |
406 } while (res == NativeRegExpMacroAssembler::RETRY); | |
407 if (res == NativeRegExpMacroAssembler::EXCEPTION) { | |
408 ASSERT(Top::has_pending_exception()); | 467 ASSERT(Top::has_pending_exception()); |
409 return Handle<Object>::null(); | 468 return Handle<Object>::null(); |
410 } | 469 } |
411 ASSERT(res == NativeRegExpMacroAssembler::SUCCESS | |
412 || res == NativeRegExpMacroAssembler::FAILURE); | |
413 | 470 |
414 if (res != NativeRegExpMacroAssembler::SUCCESS) return Factory::null_value(); | 471 OffsetsVector registers(required_registers); |
415 | 472 |
Erik Corry
2010/03/19 11:04:11
2 blank lines
Lasse Reichstein
2010/03/19 11:25:42
Ignored, per offline discussion.
| |
416 array = Handle<FixedArray>(FixedArray::cast(last_match_info->elements())); | 473 IrregexpResult res = IrregexpExecOnce(jsregexp, |
417 ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead); | 474 subject, |
418 // The captures come in (start, end+1) pairs. | 475 previous_index, |
419 for (int i = 0; i < number_of_capture_registers; i += 2) { | 476 Vector<int>(registers.vector(), |
420 // Capture values are relative to start_offset only. | 477 registers.length())); |
421 // Convert them to be relative to start of string. | 478 if (res == RE_SUCCESS) { |
422 if (captures_vector[i] >= 0) { | 479 int capture_register_count = |
423 captures_vector[i] += previous_index; | 480 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2; |
481 last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead); | |
482 AssertNoAllocation no_gc; | |
483 int* register_vector = registers.vector(); | |
484 FixedArray* array = FixedArray::cast(last_match_info->elements()); | |
485 for (int i = 0; i < capture_register_count; i += 2) { | |
486 SetCapture(array, i, register_vector[i]); | |
487 SetCapture(array, i + 1, register_vector[i + 1]); | |
424 } | 488 } |
425 if (captures_vector[i + 1] >= 0) { | 489 SetLastCaptureCount(array, capture_register_count); |
426 captures_vector[i + 1] += previous_index; | 490 SetLastSubject(array, *subject); |
427 } | 491 SetLastInput(array, *subject); |
428 SetCapture(*array, i, captures_vector[i]); | 492 return last_match_info; |
429 SetCapture(*array, i + 1, captures_vector[i + 1]); | |
430 } | 493 } |
431 | 494 if (res == RE_EXCEPTION) { |
432 #else // ! V8_NATIVE_REGEXP | 495 ASSERT(Top::has_pending_exception()); |
433 | |
434 bool is_ascii = subject->IsAsciiRepresentation(); | |
435 if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) { | |
436 return Handle<Object>::null(); | 496 return Handle<Object>::null(); |
437 } | 497 } |
438 // Now that we have done EnsureCompiledIrregexp we can get the number of | 498 ASSERT(res == RE_FAILURE); |
439 // registers. | 499 return Factory::null_value(); |
440 int number_of_registers = | |
441 IrregexpNumberOfRegisters(FixedArray::cast(jsregexp->data())); | |
442 OffsetsVector registers(number_of_registers); | |
443 int* register_vector = registers.vector(); | |
444 for (int i = number_of_capture_registers - 1; i >= 0; i--) { | |
445 register_vector[i] = -1; | |
446 } | |
447 Handle<ByteArray> byte_codes(IrregexpByteCode(*regexp, is_ascii)); | |
448 | |
449 if (!IrregexpInterpreter::Match(byte_codes, | |
450 subject, | |
451 register_vector, | |
452 previous_index)) { | |
453 return Factory::null_value(); | |
454 } | |
455 | |
456 array = Handle<FixedArray>(FixedArray::cast(last_match_info->elements())); | |
457 ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead); | |
458 // The captures come in (start, end+1) pairs. | |
459 for (int i = 0; i < number_of_capture_registers; i += 2) { | |
460 SetCapture(*array, i, register_vector[i]); | |
461 SetCapture(*array, i + 1, register_vector[i + 1]); | |
462 } | |
463 | |
464 #endif // V8_NATIVE_REGEXP | |
465 | |
466 SetLastCaptureCount(*array, number_of_capture_registers); | |
467 SetLastSubject(*array, *subject); | |
468 SetLastInput(*array, *subject); | |
469 | |
470 return last_match_info; | |
471 } | 500 } |
472 | 501 |
473 | 502 |
474 // ------------------------------------------------------------------- | 503 // ------------------------------------------------------------------- |
475 // Implementation of the Irregexp regular expression engine. | 504 // Implementation of the Irregexp regular expression engine. |
476 // | 505 // |
477 // The Irregexp regular expression engine is intended to be a complete | 506 // The Irregexp regular expression engine is intended to be a complete |
478 // implementation of ECMAScript regular expressions. It generates either | 507 // implementation of ECMAScript regular expressions. It generates either |
479 // bytecodes or native code. | 508 // bytecodes or native code. |
480 | 509 |
(...skipping 4744 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5225 node, | 5254 node, |
5226 data->capture_count, | 5255 data->capture_count, |
5227 pattern); | 5256 pattern); |
5228 } | 5257 } |
5229 | 5258 |
5230 | 5259 |
5231 int OffsetsVector::static_offsets_vector_[ | 5260 int OffsetsVector::static_offsets_vector_[ |
5232 OffsetsVector::kStaticOffsetsVectorSize]; | 5261 OffsetsVector::kStaticOffsetsVectorSize]; |
5233 | 5262 |
5234 }} // namespace v8::internal | 5263 }} // namespace v8::internal |
OLD | NEW |