Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(392)

Side by Side Diff: src/jsregexp.cc

Issue 1114001: Refactoring of RegExp interface to better support calling several times in a row. (Closed)
Patch Set: Fix type that snuck into the commit. Created 10 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. 1 // Copyright 2006-2009 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after
142 // Parse-tree is a single atom that is equal to the pattern. 142 // Parse-tree is a single atom that is equal to the pattern.
143 AtomCompile(re, pattern, flags, pattern); 143 AtomCompile(re, pattern, flags, pattern);
144 } else if (parse_result.tree->IsAtom() && 144 } else if (parse_result.tree->IsAtom() &&
145 !flags.is_ignore_case() && 145 !flags.is_ignore_case() &&
146 parse_result.capture_count == 0) { 146 parse_result.capture_count == 0) {
147 RegExpAtom* atom = parse_result.tree->AsAtom(); 147 RegExpAtom* atom = parse_result.tree->AsAtom();
148 Vector<const uc16> atom_pattern = atom->data(); 148 Vector<const uc16> atom_pattern = atom->data();
149 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern); 149 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern);
150 AtomCompile(re, pattern, flags, atom_string); 150 AtomCompile(re, pattern, flags, atom_string);
151 } else { 151 } else {
152 IrregexpPrepare(re, pattern, flags, parse_result.capture_count); 152 IrregexpInitialize(re, pattern, flags, parse_result.capture_count);
153 } 153 }
154 ASSERT(re->data()->IsFixedArray()); 154 ASSERT(re->data()->IsFixedArray());
155 // Compilation succeeded so the data is set on the regexp 155 // Compilation succeeded so the data is set on the regexp
156 // and we can store it in the cache. 156 // and we can store it in the cache.
157 Handle<FixedArray> data(FixedArray::cast(re->data())); 157 Handle<FixedArray> data(FixedArray::cast(re->data()));
158 CompilationCache::PutRegExp(pattern, flags, data); 158 CompilationCache::PutRegExp(pattern, flags, data);
159 159
160 return re; 160 return re;
161 } 161 }
162 162
(...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after
334 ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) { 334 ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) {
335 return ByteArray::cast(re->get(JSRegExp::code_index(is_ascii))); 335 return ByteArray::cast(re->get(JSRegExp::code_index(is_ascii)));
336 } 336 }
337 337
338 338
339 Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) { 339 Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) {
340 return Code::cast(re->get(JSRegExp::code_index(is_ascii))); 340 return Code::cast(re->get(JSRegExp::code_index(is_ascii)));
341 } 341 }
342 342
343 343
344 void RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re, 344 void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re,
345 Handle<String> pattern, 345 Handle<String> pattern,
Erik Corry 2010/03/19 11:04:11 indent
346 JSRegExp::Flags flags, 346 JSRegExp::Flags flags,
347 int capture_count) { 347 int capture_count) {
348 // Initialize compiled code entries to null. 348 // Initialize compiled code entries to null.
349 Factory::SetRegExpIrregexpData(re, 349 Factory::SetRegExpIrregexpData(re,
350 JSRegExp::IRREGEXP, 350 JSRegExp::IRREGEXP,
351 pattern, 351 pattern,
352 flags, 352 flags,
353 capture_count); 353 capture_count);
354 } 354 }
355 355
356 356
357 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,
358 Handle<String> subject) {
359 if (!subject->IsFlat()) {
360 FlattenString(subject);
361 }
362 bool is_ascii = subject->IsAsciiRepresentation();
363 if (!EnsureCompiledIrregexp(regexp, is_ascii)) {
364 return -1;
365 }
366 #ifdef V8_NATIVE_REGEXP
367 // Native regexp only needs room to output captures. Registers are handled
368 // internally.
369 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
370 #else // !V8_NATIVE_REGEXP
371 // Byte-code regexp needs space allocated for all its registers.
372 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data()));
373 #endif // V8_NATIVE_REGEXP
Erik Corry 2010/03/19 11:04:11 V8 -> !V8 Actually I prefer the comment // ndef
Lasse Reichstein 2010/03/19 11:25:42 Changed to ndef.
374 }
375
376
377 RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp,
378 Handle<String> subject,
379 int index,
380 Vector<int> output) {
381 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()));
382
383 #ifdef V8_NATIVE_REGEXP
384 ASSERT(output.length() >=
Erik Corry 2010/03/19 11:04:11 please move this assert into the ifdef below
Lasse Reichstein 2010/03/19 11:25:42 Done.
385 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
386 #else
387 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp));
388 #endif
389 ASSERT(index >= 0);
390 ASSERT(index <= subject->length());
391 ASSERT(subject->IsFlat());
392
393 #ifdef V8_NATIVE_REGEXP
394 do {
395 bool is_ascii = subject->IsAsciiRepresentation();
396 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii));
397 NativeRegExpMacroAssembler::Result res =
398 NativeRegExpMacroAssembler::Match(code,
399 subject,
400 output.start(),
401 output.length(),
402 index);
403 if (res != NativeRegExpMacroAssembler::RETRY) {
404 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION ||
405 Top::has_pending_exception());
406 STATIC_ASSERT(
407 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS);
408 STATIC_ASSERT(
409 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE);
410 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION)
411 == RE_EXCEPTION);
412 return static_cast<IrregexpResult>(res);
413 }
414 // If result is RETRY, the string have changed representation, and we
Erik Corry 2010/03/19 11:04:11 have -> has
415 // must restart from scratch.
416 // In this case, it means we must make sure we are prepared to handle
417 // the, potentially, differen subject (the string can switch between
Erik Corry 2010/03/19 11:04:11 en -> ent
418 // being internal and external, and even between being ASCII and UC16,
419 // but the characters are always the same).
420 IrregexpPrepare(regexp, subject);
421 } while (true);
422 UNREACHABLE();
423 return RE_EXCEPTION;
424 #else // ! V8_NATIVE_REGEXP
425
426 bool is_ascii = subject->IsAsciiRepresentation();
427 // We must have done EnsureCompiledIrregexp, so we can get the number of
428 // registers.
429 int* register_vector = output.start();
430 int number_of_capture_registers =
431 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2;
432 for (int i = number_of_capture_registers - 1; i >= 0; i--) {
433 register_vector[i] = -1;
434 }
435 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii));
436
437 if (IrregexpInterpreter::Match(byte_codes,
438 subject,
439 register_vector,
440 index)) {
441 return RE_SUCCESS;
442 }
443 return RE_FAILURE;
444 #endif // V8_NATIVE_REGEXP
445 }
446
447
357 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp, 448 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp,
358 Handle<String> subject, 449 Handle<String> subject,
359 int previous_index, 450 int previous_index,
360 Handle<JSArray> last_match_info) { 451 Handle<JSArray> last_match_info) {
361 ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP); 452 ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP);
362 453
363 // Prepare space for the return values. 454 // Prepare space for the return values.
364 int number_of_capture_registers =
365 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2;
366
367 #ifndef V8_NATIVE_REGEXP 455 #ifndef V8_NATIVE_REGEXP
368 #ifdef DEBUG 456 #ifdef DEBUG
369 if (FLAG_trace_regexp_bytecodes) { 457 if (FLAG_trace_regexp_bytecodes) {
370 String* pattern = jsregexp->Pattern(); 458 String* pattern = jsregexp->Pattern();
371 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); 459 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
372 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); 460 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
373 } 461 }
374 #endif 462 #endif
375 #endif 463 #endif
376 464 int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject);
377 if (!subject->IsFlat()) { 465 if (required_registers < 0) {
378 FlattenString(subject); 466 // Compiling failed with an exception.
379 }
380
381 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead);
382
383 Handle<FixedArray> array;
384
385 // Dispatch to the correct RegExp implementation.
386 Handle<FixedArray> regexp(FixedArray::cast(jsregexp->data()));
387
388 #ifdef V8_NATIVE_REGEXP
389
390 OffsetsVector captures(number_of_capture_registers);
391 int* captures_vector = captures.vector();
392 NativeRegExpMacroAssembler::Result res;
393 do {
394 bool is_ascii = subject->IsAsciiRepresentation();
395 if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) {
396 return Handle<Object>::null();
397 }
398 Handle<Code> code(RegExpImpl::IrregexpNativeCode(*regexp, is_ascii));
399 res = NativeRegExpMacroAssembler::Match(code,
400 subject,
401 captures_vector,
402 captures.length(),
403 previous_index);
404 // If result is RETRY, the string have changed representation, and we
405 // must restart from scratch.
406 } while (res == NativeRegExpMacroAssembler::RETRY);
407 if (res == NativeRegExpMacroAssembler::EXCEPTION) {
408 ASSERT(Top::has_pending_exception()); 467 ASSERT(Top::has_pending_exception());
409 return Handle<Object>::null(); 468 return Handle<Object>::null();
410 } 469 }
411 ASSERT(res == NativeRegExpMacroAssembler::SUCCESS
412 || res == NativeRegExpMacroAssembler::FAILURE);
413 470
414 if (res != NativeRegExpMacroAssembler::SUCCESS) return Factory::null_value(); 471 OffsetsVector registers(required_registers);
415 472
Erik Corry 2010/03/19 11:04:11 2 blank lines
Lasse Reichstein 2010/03/19 11:25:42 Ignored, per offline discussion.
416 array = Handle<FixedArray>(FixedArray::cast(last_match_info->elements())); 473 IrregexpResult res = IrregexpExecOnce(jsregexp,
417 ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead); 474 subject,
418 // The captures come in (start, end+1) pairs. 475 previous_index,
419 for (int i = 0; i < number_of_capture_registers; i += 2) { 476 Vector<int>(registers.vector(),
420 // Capture values are relative to start_offset only. 477 registers.length()));
421 // Convert them to be relative to start of string. 478 if (res == RE_SUCCESS) {
422 if (captures_vector[i] >= 0) { 479 int capture_register_count =
423 captures_vector[i] += previous_index; 480 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2;
481 last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead);
482 AssertNoAllocation no_gc;
483 int* register_vector = registers.vector();
484 FixedArray* array = FixedArray::cast(last_match_info->elements());
485 for (int i = 0; i < capture_register_count; i += 2) {
486 SetCapture(array, i, register_vector[i]);
487 SetCapture(array, i + 1, register_vector[i + 1]);
424 } 488 }
425 if (captures_vector[i + 1] >= 0) { 489 SetLastCaptureCount(array, capture_register_count);
426 captures_vector[i + 1] += previous_index; 490 SetLastSubject(array, *subject);
427 } 491 SetLastInput(array, *subject);
428 SetCapture(*array, i, captures_vector[i]); 492 return last_match_info;
429 SetCapture(*array, i + 1, captures_vector[i + 1]);
430 } 493 }
431 494 if (res == RE_EXCEPTION) {
432 #else // ! V8_NATIVE_REGEXP 495 ASSERT(Top::has_pending_exception());
433
434 bool is_ascii = subject->IsAsciiRepresentation();
435 if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) {
436 return Handle<Object>::null(); 496 return Handle<Object>::null();
437 } 497 }
438 // Now that we have done EnsureCompiledIrregexp we can get the number of 498 ASSERT(res == RE_FAILURE);
439 // registers. 499 return Factory::null_value();
440 int number_of_registers =
441 IrregexpNumberOfRegisters(FixedArray::cast(jsregexp->data()));
442 OffsetsVector registers(number_of_registers);
443 int* register_vector = registers.vector();
444 for (int i = number_of_capture_registers - 1; i >= 0; i--) {
445 register_vector[i] = -1;
446 }
447 Handle<ByteArray> byte_codes(IrregexpByteCode(*regexp, is_ascii));
448
449 if (!IrregexpInterpreter::Match(byte_codes,
450 subject,
451 register_vector,
452 previous_index)) {
453 return Factory::null_value();
454 }
455
456 array = Handle<FixedArray>(FixedArray::cast(last_match_info->elements()));
457 ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead);
458 // The captures come in (start, end+1) pairs.
459 for (int i = 0; i < number_of_capture_registers; i += 2) {
460 SetCapture(*array, i, register_vector[i]);
461 SetCapture(*array, i + 1, register_vector[i + 1]);
462 }
463
464 #endif // V8_NATIVE_REGEXP
465
466 SetLastCaptureCount(*array, number_of_capture_registers);
467 SetLastSubject(*array, *subject);
468 SetLastInput(*array, *subject);
469
470 return last_match_info;
471 } 500 }
472 501
473 502
474 // ------------------------------------------------------------------- 503 // -------------------------------------------------------------------
475 // Implementation of the Irregexp regular expression engine. 504 // Implementation of the Irregexp regular expression engine.
476 // 505 //
477 // The Irregexp regular expression engine is intended to be a complete 506 // The Irregexp regular expression engine is intended to be a complete
478 // implementation of ECMAScript regular expressions. It generates either 507 // implementation of ECMAScript regular expressions. It generates either
479 // bytecodes or native code. 508 // bytecodes or native code.
480 509
(...skipping 4744 matching lines...) Expand 10 before | Expand all | Expand 10 after
5225 node, 5254 node,
5226 data->capture_count, 5255 data->capture_count,
5227 pattern); 5256 pattern);
5228 } 5257 }
5229 5258
5230 5259
5231 int OffsetsVector::static_offsets_vector_[ 5260 int OffsetsVector::static_offsets_vector_[
5232 OffsetsVector::kStaticOffsetsVectorSize]; 5261 OffsetsVector::kStaticOffsetsVectorSize];
5233 5262
5234 }} // namespace v8::internal 5263 }} // namespace v8::internal
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698