Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1188)

Side by Side Diff: src/jsregexp.cc

Issue 1148007: Merge bleeding_edge from version 2.1.3 up to revision 4205... (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/partial_snapshots/
Patch Set: Created 10 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. 1 // Copyright 2006-2009 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after
137 // Parse-tree is a single atom that is equal to the pattern. 137 // Parse-tree is a single atom that is equal to the pattern.
138 AtomCompile(re, pattern, flags, pattern); 138 AtomCompile(re, pattern, flags, pattern);
139 } else if (parse_result.tree->IsAtom() && 139 } else if (parse_result.tree->IsAtom() &&
140 !flags.is_ignore_case() && 140 !flags.is_ignore_case() &&
141 parse_result.capture_count == 0) { 141 parse_result.capture_count == 0) {
142 RegExpAtom* atom = parse_result.tree->AsAtom(); 142 RegExpAtom* atom = parse_result.tree->AsAtom();
143 Vector<const uc16> atom_pattern = atom->data(); 143 Vector<const uc16> atom_pattern = atom->data();
144 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern); 144 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern);
145 AtomCompile(re, pattern, flags, atom_string); 145 AtomCompile(re, pattern, flags, atom_string);
146 } else { 146 } else {
147 IrregexpPrepare(re, pattern, flags, parse_result.capture_count); 147 IrregexpInitialize(re, pattern, flags, parse_result.capture_count);
148 } 148 }
149 ASSERT(re->data()->IsFixedArray()); 149 ASSERT(re->data()->IsFixedArray());
150 // Compilation succeeded so the data is set on the regexp 150 // Compilation succeeded so the data is set on the regexp
151 // and we can store it in the cache. 151 // and we can store it in the cache.
152 Handle<FixedArray> data(FixedArray::cast(re->data())); 152 Handle<FixedArray> data(FixedArray::cast(re->data()));
153 CompilationCache::PutRegExp(pattern, flags, data); 153 CompilationCache::PutRegExp(pattern, flags, data);
154 154
155 return re; 155 return re;
156 } 156 }
157 157
(...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after
329 ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) { 329 ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) {
330 return ByteArray::cast(re->get(JSRegExp::code_index(is_ascii))); 330 return ByteArray::cast(re->get(JSRegExp::code_index(is_ascii)));
331 } 331 }
332 332
333 333
334 Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) { 334 Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) {
335 return Code::cast(re->get(JSRegExp::code_index(is_ascii))); 335 return Code::cast(re->get(JSRegExp::code_index(is_ascii)));
336 } 336 }
337 337
338 338
339 void RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re, 339 void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re,
340 Handle<String> pattern, 340 Handle<String> pattern,
341 JSRegExp::Flags flags, 341 JSRegExp::Flags flags,
342 int capture_count) { 342 int capture_count) {
343 // Initialize compiled code entries to null. 343 // Initialize compiled code entries to null.
344 Factory::SetRegExpIrregexpData(re, 344 Factory::SetRegExpIrregexpData(re,
345 JSRegExp::IRREGEXP, 345 JSRegExp::IRREGEXP,
346 pattern, 346 pattern,
347 flags, 347 flags,
348 capture_count); 348 capture_count);
349 } 349 }
350 350
351 351
352 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,
353 Handle<String> subject) {
354 if (!subject->IsFlat()) {
355 FlattenString(subject);
356 }
357 bool is_ascii = subject->IsAsciiRepresentation();
358 if (!EnsureCompiledIrregexp(regexp, is_ascii)) {
359 return -1;
360 }
361 #ifdef V8_NATIVE_REGEXP
362 // Native regexp only needs room to output captures. Registers are handled
363 // internally.
364 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
365 #else // !V8_NATIVE_REGEXP
366 // Byte-code regexp needs space allocated for all its registers.
367 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data()));
368 #endif // V8_NATIVE_REGEXP
369 }
370
371
372 RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp,
373 Handle<String> subject,
374 int index,
375 Vector<int> output) {
376 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()));
377
378 ASSERT(index >= 0);
379 ASSERT(index <= subject->length());
380 ASSERT(subject->IsFlat());
381
382 #ifdef V8_NATIVE_REGEXP
383 ASSERT(output.length() >=
384 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
385 do {
386 bool is_ascii = subject->IsAsciiRepresentation();
387 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii));
388 NativeRegExpMacroAssembler::Result res =
389 NativeRegExpMacroAssembler::Match(code,
390 subject,
391 output.start(),
392 output.length(),
393 index);
394 if (res != NativeRegExpMacroAssembler::RETRY) {
395 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION ||
396 Top::has_pending_exception());
397 STATIC_ASSERT(
398 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS);
399 STATIC_ASSERT(
400 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE);
401 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION)
402 == RE_EXCEPTION);
403 return static_cast<IrregexpResult>(res);
404 }
405 // If result is RETRY, the string has changed representation, and we
406 // must restart from scratch.
407 // In this case, it means we must make sure we are prepared to handle
408 // the, potentially, differen subject (the string can switch between
409 // being internal and external, and even between being ASCII and UC16,
410 // but the characters are always the same).
411 IrregexpPrepare(regexp, subject);
412 } while (true);
413 UNREACHABLE();
414 return RE_EXCEPTION;
415 #else // ndef V8_NATIVE_REGEXP
416
417 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp));
418 bool is_ascii = subject->IsAsciiRepresentation();
419 // We must have done EnsureCompiledIrregexp, so we can get the number of
420 // registers.
421 int* register_vector = output.start();
422 int number_of_capture_registers =
423 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2;
424 for (int i = number_of_capture_registers - 1; i >= 0; i--) {
425 register_vector[i] = -1;
426 }
427 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii));
428
429 if (IrregexpInterpreter::Match(byte_codes,
430 subject,
431 register_vector,
432 index)) {
433 return RE_SUCCESS;
434 }
435 return RE_FAILURE;
436 #endif // ndef V8_NATIVE_REGEXP
437 }
438
439
352 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp, 440 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp,
353 Handle<String> subject, 441 Handle<String> subject,
354 int previous_index, 442 int previous_index,
355 Handle<JSArray> last_match_info) { 443 Handle<JSArray> last_match_info) {
356 ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP); 444 ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP);
357 445
358 // Prepare space for the return values. 446 // Prepare space for the return values.
359 int number_of_capture_registers =
360 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2;
361
362 #ifndef V8_NATIVE_REGEXP 447 #ifndef V8_NATIVE_REGEXP
363 #ifdef DEBUG 448 #ifdef DEBUG
364 if (FLAG_trace_regexp_bytecodes) { 449 if (FLAG_trace_regexp_bytecodes) {
365 String* pattern = jsregexp->Pattern(); 450 String* pattern = jsregexp->Pattern();
366 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); 451 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
367 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); 452 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
368 } 453 }
369 #endif 454 #endif
370 #endif 455 #endif
371 456 int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject);
372 if (!subject->IsFlat()) { 457 if (required_registers < 0) {
373 FlattenString(subject); 458 // Compiling failed with an exception.
374 }
375
376 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead);
377
378 Handle<FixedArray> array;
379
380 // Dispatch to the correct RegExp implementation.
381 Handle<FixedArray> regexp(FixedArray::cast(jsregexp->data()));
382
383 #ifdef V8_NATIVE_REGEXP
384
385 OffsetsVector captures(number_of_capture_registers);
386 int* captures_vector = captures.vector();
387 NativeRegExpMacroAssembler::Result res;
388 do {
389 bool is_ascii = subject->IsAsciiRepresentation();
390 if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) {
391 return Handle<Object>::null();
392 }
393 Handle<Code> code(RegExpImpl::IrregexpNativeCode(*regexp, is_ascii));
394 res = NativeRegExpMacroAssembler::Match(code,
395 subject,
396 captures_vector,
397 captures.length(),
398 previous_index);
399 // If result is RETRY, the string have changed representation, and we
400 // must restart from scratch.
401 } while (res == NativeRegExpMacroAssembler::RETRY);
402 if (res == NativeRegExpMacroAssembler::EXCEPTION) {
403 ASSERT(Top::has_pending_exception()); 459 ASSERT(Top::has_pending_exception());
404 return Handle<Object>::null(); 460 return Handle<Object>::null();
405 } 461 }
406 ASSERT(res == NativeRegExpMacroAssembler::SUCCESS
407 || res == NativeRegExpMacroAssembler::FAILURE);
408 462
409 if (res != NativeRegExpMacroAssembler::SUCCESS) return Factory::null_value(); 463 OffsetsVector registers(required_registers);
410 464
411 array = Handle<FixedArray>(FixedArray::cast(last_match_info->elements())); 465 IrregexpResult res = IrregexpExecOnce(jsregexp,
412 ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead); 466 subject,
413 // The captures come in (start, end+1) pairs. 467 previous_index,
414 for (int i = 0; i < number_of_capture_registers; i += 2) { 468 Vector<int>(registers.vector(),
415 // Capture values are relative to start_offset only. 469 registers.length()));
416 // Convert them to be relative to start of string. 470 if (res == RE_SUCCESS) {
417 if (captures_vector[i] >= 0) { 471 int capture_register_count =
418 captures_vector[i] += previous_index; 472 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2;
473 last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead);
474 AssertNoAllocation no_gc;
475 int* register_vector = registers.vector();
476 FixedArray* array = FixedArray::cast(last_match_info->elements());
477 for (int i = 0; i < capture_register_count; i += 2) {
478 SetCapture(array, i, register_vector[i]);
479 SetCapture(array, i + 1, register_vector[i + 1]);
419 } 480 }
420 if (captures_vector[i + 1] >= 0) { 481 SetLastCaptureCount(array, capture_register_count);
421 captures_vector[i + 1] += previous_index; 482 SetLastSubject(array, *subject);
422 } 483 SetLastInput(array, *subject);
423 SetCapture(*array, i, captures_vector[i]); 484 return last_match_info;
424 SetCapture(*array, i + 1, captures_vector[i + 1]);
425 } 485 }
426 486 if (res == RE_EXCEPTION) {
427 #else // ! V8_NATIVE_REGEXP 487 ASSERT(Top::has_pending_exception());
428
429 bool is_ascii = subject->IsAsciiRepresentation();
430 if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) {
431 return Handle<Object>::null(); 488 return Handle<Object>::null();
432 } 489 }
433 // Now that we have done EnsureCompiledIrregexp we can get the number of 490 ASSERT(res == RE_FAILURE);
434 // registers. 491 return Factory::null_value();
435 int number_of_registers =
436 IrregexpNumberOfRegisters(FixedArray::cast(jsregexp->data()));
437 OffsetsVector registers(number_of_registers);
438 int* register_vector = registers.vector();
439 for (int i = number_of_capture_registers - 1; i >= 0; i--) {
440 register_vector[i] = -1;
441 }
442 Handle<ByteArray> byte_codes(IrregexpByteCode(*regexp, is_ascii));
443
444 if (!IrregexpInterpreter::Match(byte_codes,
445 subject,
446 register_vector,
447 previous_index)) {
448 return Factory::null_value();
449 }
450
451 array = Handle<FixedArray>(FixedArray::cast(last_match_info->elements()));
452 ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead);
453 // The captures come in (start, end+1) pairs.
454 for (int i = 0; i < number_of_capture_registers; i += 2) {
455 SetCapture(*array, i, register_vector[i]);
456 SetCapture(*array, i + 1, register_vector[i + 1]);
457 }
458
459 #endif // V8_NATIVE_REGEXP
460
461 SetLastCaptureCount(*array, number_of_capture_registers);
462 SetLastSubject(*array, *subject);
463 SetLastInput(*array, *subject);
464
465 return last_match_info;
466 } 492 }
467 493
468 494
469 // ------------------------------------------------------------------- 495 // -------------------------------------------------------------------
470 // Implementation of the Irregexp regular expression engine. 496 // Implementation of the Irregexp regular expression engine.
471 // 497 //
472 // The Irregexp regular expression engine is intended to be a complete 498 // The Irregexp regular expression engine is intended to be a complete
473 // implementation of ECMAScript regular expressions. It generates either 499 // implementation of ECMAScript regular expressions. It generates either
474 // bytecodes or native code. 500 // bytecodes or native code.
475 501
(...skipping 4744 matching lines...) Expand 10 before | Expand all | Expand 10 after
5220 node, 5246 node,
5221 data->capture_count, 5247 data->capture_count,
5222 pattern); 5248 pattern);
5223 } 5249 }
5224 5250
5225 5251
5226 int OffsetsVector::static_offsets_vector_[ 5252 int OffsetsVector::static_offsets_vector_[
5227 OffsetsVector::kStaticOffsetsVectorSize]; 5253 OffsetsVector::kStaticOffsetsVectorSize];
5228 5254
5229 }} // namespace v8::internal 5255 }} // namespace v8::internal
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698