Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(394)

Side by Side Diff: src/jsregexp.cc

Issue 28311: Flattened the representation of compiled RegExp data. (Closed)
Patch Set: ... and it lints. Created 11 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/jsregexp.h ('k') | src/objects.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. 1 // Copyright 2006-2009 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after
206 Handle<String> pattern, 206 Handle<String> pattern,
207 Handle<String> flag_str) { 207 Handle<String> flag_str) {
208 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); 208 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str);
209 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags); 209 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags);
210 bool in_cache = !cached.is_null(); 210 bool in_cache = !cached.is_null();
211 LOG(RegExpCompileEvent(re, in_cache)); 211 LOG(RegExpCompileEvent(re, in_cache));
212 212
213 Handle<Object> result; 213 Handle<Object> result;
214 if (in_cache) { 214 if (in_cache) {
215 re->set_data(*cached); 215 re->set_data(*cached);
216 result = re; 216 return re;
217 } else { 217 }
218 FlattenString(pattern); 218 FlattenString(pattern);
219 ZoneScope zone_scope(DELETE_ON_EXIT); 219 ZoneScope zone_scope(DELETE_ON_EXIT);
220 RegExpCompileData parse_result; 220 RegExpCompileData parse_result;
221 FlatStringReader reader(pattern); 221 FlatStringReader reader(pattern);
222 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { 222 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) {
223 // Throw an exception if we fail to parse the pattern. 223 // Throw an exception if we fail to parse the pattern.
224 ThrowRegExpException(re, 224 ThrowRegExpException(re,
225 pattern, 225 pattern,
226 parse_result.error, 226 parse_result.error,
227 "malformed_regexp"); 227 "malformed_regexp");
228 return Handle<Object>::null(); 228 return Handle<Object>::null();
229 }
230
231 if (parse_result.simple && !flags.is_ignore_case()) {
232 // Parse-tree is a single atom that is equal to the pattern.
233 result = AtomCompile(re, pattern, flags, pattern);
234 } else if (parse_result.tree->IsAtom() &&
235 !flags.is_ignore_case() &&
236 parse_result.capture_count == 0) {
237 RegExpAtom* atom = parse_result.tree->AsAtom();
238 Vector<const uc16> atom_pattern = atom->data();
239 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern);
240 result = AtomCompile(re, pattern, flags, atom_string);
241 } else {
242 result = IrregexpPrepare(re, pattern, flags);
243 }
244 Object* data = re->data();
245 if (data->IsFixedArray()) {
246 // If compilation succeeded then the data is set on the regexp
247 // and we can store it in the cache.
248 Handle<FixedArray> data(FixedArray::cast(re->data()));
249 CompilationCache::PutRegExp(pattern, flags, data);
250 }
251 } 229 }
252 230
253 return result; 231 if (parse_result.simple && !flags.is_ignore_case()) {
232 // Parse-tree is a single atom that is equal to the pattern.
233 AtomCompile(re, pattern, flags, pattern);
234 } else if (parse_result.tree->IsAtom() &&
235 !flags.is_ignore_case() &&
236 parse_result.capture_count == 0) {
237 RegExpAtom* atom = parse_result.tree->AsAtom();
238 Vector<const uc16> atom_pattern = atom->data();
239 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern);
240 AtomCompile(re, pattern, flags, atom_string);
241 } else {
242 IrregexpPrepare(re, pattern, flags, parse_result.capture_count);
243 }
244 ASSERT(re->data()->IsFixedArray());
245 // Compilation succeeded so the data is set on the regexp
246 // and we can store it in the cache.
247 Handle<FixedArray> data(FixedArray::cast(re->data()));
248 CompilationCache::PutRegExp(pattern, flags, data);
249
250 return re;
254 } 251 }
255 252
256 253
257 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, 254 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
258 Handle<String> subject, 255 Handle<String> subject,
259 int index, 256 int index,
260 Handle<JSArray> last_match_info) { 257 Handle<JSArray> last_match_info) {
261 switch (regexp->TypeTag()) { 258 switch (regexp->TypeTag()) {
262 case JSRegExp::ATOM: 259 case JSRegExp::ATOM:
263 return AtomExec(regexp, subject, index, last_match_info); 260 return AtomExec(regexp, subject, index, last_match_info);
264 case JSRegExp::IRREGEXP: { 261 case JSRegExp::IRREGEXP: {
265 Handle<Object> result = 262 Handle<Object> result =
266 IrregexpExec(regexp, subject, index, last_match_info); 263 IrregexpExec(regexp, subject, index, last_match_info);
267 ASSERT(!result.is_null() || Top::has_pending_exception()); 264 ASSERT(!result.is_null() || Top::has_pending_exception());
268 return result; 265 return result;
269 } 266 }
270 default: 267 default:
271 UNREACHABLE(); 268 UNREACHABLE();
272 return Handle<Object>::null(); 269 return Handle<Object>::null();
273 } 270 }
274 } 271 }
275 272
276 273
277 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, 274 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,
278 Handle<String> subject, 275 Handle<String> subject,
279 Handle<JSArray> last_match_info) { 276 Handle<JSArray> last_match_info) {
280 switch (regexp->TypeTag()) { 277 switch (regexp->TypeTag()) {
281 case JSRegExp::ATOM: 278 case JSRegExp::ATOM:
282 return AtomExecGlobal(regexp, subject, last_match_info); 279 return AtomExecGlobal(regexp, subject, last_match_info);
283 case JSRegExp::IRREGEXP: { 280 case JSRegExp::IRREGEXP: {
284 Handle<Object> result = 281 Handle<Object> result =
285 IrregexpExecGlobal(regexp, subject, last_match_info); 282 IrregexpExecGlobal(regexp, subject, last_match_info);
286 ASSERT(!result.is_null() || Top::has_pending_exception()); 283 ASSERT(!result.is_null() || Top::has_pending_exception());
287 return result; 284 return result;
288 } 285 }
289 default: 286 default:
290 UNREACHABLE(); 287 UNREACHABLE();
291 return Handle<Object>::null(); 288 return Handle<Object>::null();
292 } 289 }
293 } 290 }
294 291
295 292
296 // RegExp Atom implementation: Simple string search using indexOf. 293 // RegExp Atom implementation: Simple string search using indexOf.
297 294
298 295
299 Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re, 296 void RegExpImpl::AtomCompile(Handle<JSRegExp> re,
300 Handle<String> pattern, 297 Handle<String> pattern,
301 JSRegExp::Flags flags, 298 JSRegExp::Flags flags,
302 Handle<String> match_pattern) { 299 Handle<String> match_pattern) {
303 Factory::SetRegExpData(re, JSRegExp::ATOM, pattern, flags, match_pattern); 300 Factory::SetRegExpAtomData(re,
304 return re; 301 JSRegExp::ATOM,
302 pattern,
303 flags,
304 match_pattern);
305 } 305 }
306 306
307 307
308 static void SetAtomLastCapture(FixedArray* array, 308 static void SetAtomLastCapture(FixedArray* array,
309 String* subject, 309 String* subject,
310 int from, 310 int from,
311 int to) { 311 int to) {
312 RegExpImpl::SetLastCaptureCount(array, 2); 312 RegExpImpl::SetLastCaptureCount(array, 2);
313 RegExpImpl::SetLastSubject(array, subject); 313 RegExpImpl::SetLastSubject(array, subject);
314 RegExpImpl::SetLastInput(array, subject); 314 RegExpImpl::SetLastInput(array, subject);
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
379 if (needle_length == 0) index++; 379 if (needle_length == 0) index++;
380 last_value = value; 380 last_value = value;
381 } 381 }
382 return result; 382 return result;
383 } 383 }
384 384
385 385
386 // Irregexp implementation. 386 // Irregexp implementation.
387 387
388 388
389 // Retrieves a compiled version of the regexp for either ASCII or non-ASCII 389 // Ensures that the regexp object containst a compiled version of the
Erik Corry 2009/03/02 12:01:48 spolling!
Lasse Reichstein 2009/03/02 13:54:48 Done.
390 // strings. If the compiled version doesn't already exist, it is compiled 390 // source for either ASCII or non-ASCII strings.
391 // If the compiled version doesn't already exist, it is compiled
391 // from the source pattern. 392 // from the source pattern.
392 // Irregexp is not feature complete yet. If there is something in the 393 // If compilation fails, an exception is thrown and this function
393 // regexp that the compiler cannot currently handle, an empty 394 // returns false.
394 // handle is returned, but no exception is thrown. 395 bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re,
395 static Handle<FixedArray> GetCompiledIrregexp(Handle<JSRegExp> re, 396 bool is_ascii) {
396 bool is_ascii) { 397 int index;
397 ASSERT(re->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray()); 398 if (is_ascii) {
398 Handle<FixedArray> alternatives( 399 index = JSRegExp::kIrregexpASCIICodeIndex;
399 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex))); 400 } else {
400 ASSERT_EQ(2, alternatives->length()); 401 index = JSRegExp::kIrregexpUC16CodeIndex;
401 402 }
402 int index = is_ascii ? 0 : 1; 403 Object* entry = re->DataAt(index);
403 Object* entry = alternatives->get(index); 404 if (!entry->IsTheHole()) {
404 if (!entry->IsNull()) { 405 if (entry->IsJSObject()) {
405 return Handle<FixedArray>(FixedArray::cast(entry)); 406 Top::Throw(entry);
407 return false;
408 }
409 return true;
406 } 410 }
407 411
408 // Compile the RegExp. 412 // Compile the RegExp.
409 ZoneScope zone_scope(DELETE_ON_EXIT); 413 ZoneScope zone_scope(DELETE_ON_EXIT);
410 414
411 JSRegExp::Flags flags = re->GetFlags(); 415 JSRegExp::Flags flags = re->GetFlags();
412 416
413 Handle<String> pattern(re->Pattern()); 417 Handle<String> pattern(re->Pattern());
414 if (!pattern->IsFlat(StringShape(*pattern))) { 418 if (!pattern->IsFlat(StringShape(*pattern))) {
415 FlattenString(pattern); 419 FlattenString(pattern);
416 } 420 }
417 421
418 RegExpCompileData compile_data; 422 RegExpCompileData compile_data;
419 FlatStringReader reader(pattern); 423 FlatStringReader reader(pattern);
420 if (!ParseRegExp(&reader, flags.is_multiline(), &compile_data)) { 424 if (!ParseRegExp(&reader, flags.is_multiline(), &compile_data)) {
421 // Throw an exception if we fail to parse the pattern. 425 // Throw an exception if we fail to parse the pattern.
422 // THIS SHOULD NOT HAPPEN. We already parsed it successfully once. 426 // THIS SHOULD NOT HAPPEN. We already parsed it successfully once.
423 ThrowRegExpException(re, 427 ThrowRegExpException(re,
424 pattern, 428 pattern,
425 compile_data.error, 429 compile_data.error,
426 "malformed_regexp"); 430 "malformed_regexp");
427 return Handle<FixedArray>::null(); 431 return false;
428 } 432 }
429 Handle<FixedArray> compiled_entry = 433 RegExpEngine::CompilationResult result =
430 RegExpEngine::Compile(&compile_data, 434 RegExpEngine::Compile(&compile_data,
431 flags.is_ignore_case(), 435 flags.is_ignore_case(),
432 flags.is_multiline(), 436 flags.is_multiline(),
433 pattern, 437 pattern,
434 is_ascii); 438 is_ascii);
435 if (!compiled_entry.is_null()) { 439 if (result.error_message != NULL) {
436 alternatives->set(index, *compiled_entry); 440 // Unable to compile regexp.
441 Handle<JSArray> array = Factory::NewJSArray(2);
442 SetElement(array, 0, pattern);
443 SetElement(array,
444 1,
445 Factory::NewStringFromUtf8(CStrVector(result.error_message)));
446 Handle<Object> regexp_err =
447 Factory::NewSyntaxError("malformed_regexp", array);
448 Top::Throw(*regexp_err);
449 re->SetDataAt(index, *regexp_err);
450 return false;
437 } 451 }
438 return compiled_entry; 452
453 Handle<FixedArray> data(FixedArray::cast(re->data()));
454 data->set(index, result.code);
455 int register_max = IrregexpMaxRegisterCount(data);
456 if (result.num_registers > register_max) {
457 SetIrregexpMaxRegisterCount(data, result.num_registers);
458 }
459
460 return true;
439 } 461 }
440 462
441 463
442 int RegExpImpl::IrregexpNumberOfCaptures(Handle<FixedArray> irre) { 464 int RegExpImpl::IrregexpMaxRegisterCount(Handle<FixedArray> re) {
443 return Smi::cast(irre->get(kIrregexpNumberOfCapturesIndex))->value(); 465 return Smi::cast(
466 re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value();
444 } 467 }
445 468
446 469
447 int RegExpImpl::IrregexpNumberOfRegisters(Handle<FixedArray> irre) { 470 void RegExpImpl::SetIrregexpMaxRegisterCount(Handle<FixedArray> re, int value) {
448 return Smi::cast(irre->get(kIrregexpNumberOfRegistersIndex))->value(); 471 re->set(JSRegExp::kIrregexpMaxRegisterCountIndex,
472 Smi::FromInt(value));
449 } 473 }
450 474
451 475
452 Handle<ByteArray> RegExpImpl::IrregexpByteCode(Handle<FixedArray> irre) { 476 int RegExpImpl::IrregexpNumberOfCaptures(Handle<FixedArray> re) {
453 ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value() 477 return Smi::cast(
454 == RegExpMacroAssembler::kBytecodeImplementation); 478 re->get(JSRegExp::kIrregexpCaptureCountIndex))->value();
455 return Handle<ByteArray>(ByteArray::cast(irre->get(kIrregexpCodeIndex)));
456 } 479 }
457 480
458 481
459 Handle<Code> RegExpImpl::IrregexpNativeCode(Handle<FixedArray> irre) { 482 int RegExpImpl::IrregexpNumberOfRegisters(Handle<FixedArray> re) {
Erik Corry 2009/03/02 12:01:48 This looks like exactly the same as IrregexpMaxReg
Lasse Reichstein 2009/03/02 13:54:48 It is. Or it was, not it's gone.
460 ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value() 483 return Smi::cast(
461 != RegExpMacroAssembler::kBytecodeImplementation); 484 re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value();
462 return Handle<Code>(Code::cast(irre->get(kIrregexpCodeIndex)));
463 } 485 }
464 486
465 487
466 Handle<Object>RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re, 488 Handle<ByteArray> RegExpImpl::IrregexpByteCode(Handle<FixedArray> re,
467 Handle<String> pattern, 489 bool is_ascii) {
468 JSRegExp::Flags flags) { 490 int index;
469 // Make space for ASCII and UC16 versions. 491 if (is_ascii) {
470 Handle<FixedArray> alternatives = Factory::NewFixedArray(2); 492 index = JSRegExp::kIrregexpASCIICodeIndex;
471 alternatives->set_null(0); 493 } else {
472 alternatives->set_null(1); 494 index = JSRegExp::kIrregexpUC16CodeIndex;
473 Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags, alternatives); 495 }
474 return re; 496 Object* value = re->get(index);
497 ASSERT(value->IsByteArray());
498 return Handle<ByteArray>(ByteArray::cast(value));
475 } 499 }
476 500
477 501
502 Handle<Code> RegExpImpl::IrregexpNativeCode(Handle<FixedArray> re,
503 bool is_ascii) {
504 int index;
505 if (is_ascii) {
506 index = JSRegExp::kIrregexpASCIICodeIndex;
507 } else {
508 index = JSRegExp::kIrregexpUC16CodeIndex;
509 }
510 Object* value = re->get(index);
511 ASSERT(value->IsCode());
512 return Handle<Code>(Code::cast(value));
513 }
514
515
516 void RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re,
517 Handle<String> pattern,
518 JSRegExp::Flags flags,
519 int capture_count) {
520 // Initialize compiled code entries to null.
521 Factory::SetRegExpIrregexpData(re,
522 JSRegExp::IRREGEXP,
523 pattern,
524 flags,
525 capture_count);
526 }
527
528
478 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp, 529 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
479 Handle<String> subject, 530 Handle<String> subject,
480 int index, 531 int index,
481 Handle<JSArray> last_match_info) { 532 Handle<JSArray> last_match_info) {
482 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); 533 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
483 ASSERT(regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray());
484 534
485 bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); 535 bool is_ascii = StringShape(*subject).IsAsciiRepresentation();
486 Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii); 536 if (!EnsureCompiledIrregexp(regexp, is_ascii)) {
487 if (irregexp.is_null()) {
488 // We can't handle the RegExp with IRRegExp.
489 return Handle<Object>::null(); 537 return Handle<Object>::null();
490 } 538 }
491 539
492 // Prepare space for the return values. 540 // Prepare space for the return values.
541 Handle<FixedArray> re_data(FixedArray::cast(regexp->data()));
493 int number_of_capture_registers = 542 int number_of_capture_registers =
494 (IrregexpNumberOfCaptures(irregexp) + 1) * 2; 543 (IrregexpNumberOfCaptures(re_data) + 1) * 2;
495 OffsetsVector offsets(number_of_capture_registers); 544 OffsetsVector offsets(number_of_capture_registers);
496 545
497 int previous_index = index; 546 int previous_index = index;
498 547
499 #ifdef DEBUG 548 #ifdef DEBUG
500 if (FLAG_trace_regexp_bytecodes) { 549 if (FLAG_trace_regexp_bytecodes) {
501 String* pattern = regexp->Pattern(); 550 String* pattern = regexp->Pattern();
502 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); 551 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
503 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); 552 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
504 } 553 }
505 #endif 554 #endif
506 555
507 if (!subject->IsFlat(StringShape(*subject))) { 556 if (!subject->IsFlat(StringShape(*subject))) {
508 FlattenString(subject); 557 FlattenString(subject);
509 } 558 }
510 559
511 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); 560 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead);
512 561
513 return IrregexpExecOnce(irregexp, 562 return IrregexpExecOnce(re_data,
514 number_of_capture_registers, 563 number_of_capture_registers,
515 last_match_info, 564 last_match_info,
516 subject, 565 subject,
517 previous_index, 566 previous_index,
518 offsets.vector(), 567 offsets.vector(),
519 offsets.length()); 568 offsets.length());
520 } 569 }
521 570
522 571
523 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp, 572 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
524 Handle<String> subject, 573 Handle<String> subject,
525 Handle<JSArray> last_match_info) { 574 Handle<JSArray> last_match_info) {
526 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); 575 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
576 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()));
527 577
528 bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); 578 bool is_ascii = StringShape(*subject).IsAsciiRepresentation();
529 Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii); 579 if (!EnsureCompiledIrregexp(regexp, is_ascii)) {
530 if (irregexp.is_null()) {
531 return Handle<Object>::null(); 580 return Handle<Object>::null();
532 } 581 }
533 582
534 // Prepare space for the return values. 583 // Prepare space for the return values.
535 int number_of_capture_registers = 584 int number_of_capture_registers =
536 (IrregexpNumberOfCaptures(irregexp) + 1) * 2; 585 (IrregexpNumberOfCaptures(irregexp) + 1) * 2;
537 OffsetsVector offsets(number_of_capture_registers); 586 OffsetsVector offsets(number_of_capture_registers);
538 587
539 int previous_index = 0; 588 int previous_index = 0;
540 589
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
598 647
599 } else { 648 } else {
600 ASSERT(matches->IsNull()); 649 ASSERT(matches->IsNull());
601 return result; 650 return result;
602 } 651 }
603 } 652 }
604 } 653 }
605 } 654 }
606 655
607 656
608 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> irregexp, 657 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> regexp,
609 int number_of_capture_registers, 658 int number_of_capture_registers,
610 Handle<JSArray> last_match_info, 659 Handle<JSArray> last_match_info,
611 Handle<String> subject, 660 Handle<String> subject,
612 int previous_index, 661 int previous_index,
613 int* offsets_vector, 662 int* offsets_vector,
614 int offsets_vector_length) { 663 int offsets_vector_length) {
615 ASSERT(subject->IsFlat(StringShape(*subject))); 664 StringShape shape(*subject);
665 ASSERT(subject->IsFlat(shape));
666 bool is_ascii = shape.IsAsciiRepresentation();
616 bool rc; 667 bool rc;
617 668
618 int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value(); 669 if (FLAG_regexp_native) {
670 #ifndef ARM
671 Handle<Code> code(IrregexpNativeCode(regexp, is_ascii));
619 672
620 switch (tag) { 673 // Character offsets into string.
621 case RegExpMacroAssembler::kIA32Implementation: { 674 int start_offset = previous_index;
622 #ifndef ARM 675 int end_offset = subject->length(shape);
623 Handle<Code> code = IrregexpNativeCode(irregexp);
624 676
625 StringShape shape(*subject); 677 if (shape.IsCons()) {
678 subject = Handle<String>(ConsString::cast(*subject)->first());
679 } else if (shape.IsSliced()) {
680 SlicedString* slice = SlicedString::cast(*subject);
681 start_offset += slice->start();
682 end_offset += slice->start();
683 subject = Handle<String>(slice->buffer());
684 }
626 685
627 // Character offsets into string. 686 // String is now either Sequential or External
628 int start_offset = previous_index; 687 StringShape flatshape(*subject);
629 int end_offset = subject->length(shape); 688 bool is_ascii = flatshape.IsAsciiRepresentation();
689 int char_size_shift = is_ascii ? 0 : 1;
630 690
631 if (shape.IsCons()) { 691 RegExpMacroAssemblerIA32::Result res;
632 subject = Handle<String>(ConsString::cast(*subject)->first()); 692
633 } else if (shape.IsSliced()) { 693 if (flatshape.IsExternal()) {
634 SlicedString* slice = SlicedString::cast(*subject); 694 const byte* address;
635 start_offset += slice->start(); 695 if (is_ascii) {
636 end_offset += slice->start(); 696 ExternalAsciiString* ext = ExternalAsciiString::cast(*subject);
637 subject = Handle<String>(slice->buffer()); 697 address = reinterpret_cast<const byte*>(ext->resource()->data());
698 } else {
699 ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject);
700 address = reinterpret_cast<const byte*>(ext->resource()->data());
638 } 701 }
702 res = RegExpMacroAssemblerIA32::Execute(
703 *code,
704 const_cast<Address*>(&address),
705 start_offset << char_size_shift,
706 end_offset << char_size_shift,
707 offsets_vector,
708 previous_index == 0);
709 } else { // Sequential string
710 ASSERT(StringShape(*subject).IsSequential());
711 Address char_address =
712 is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress()
713 : SeqTwoByteString::cast(*subject)->GetCharsAddress();
714 int byte_offset = char_address - reinterpret_cast<Address>(*subject);
715 res = RegExpMacroAssemblerIA32::Execute(
716 *code,
717 reinterpret_cast<Address*>(subject.location()),
718 byte_offset + (start_offset << char_size_shift),
719 byte_offset + (end_offset << char_size_shift),
720 offsets_vector,
721 previous_index == 0);
722 }
639 723
640 // String is now either Sequential or External 724 if (res == RegExpMacroAssemblerIA32::EXCEPTION) {
641 StringShape flatshape(*subject); 725 ASSERT(Top::has_pending_exception());
642 bool is_ascii = flatshape.IsAsciiRepresentation(); 726 return Handle<Object>::null();
643 int char_size_shift = is_ascii ? 0 : 1; 727 }
728 rc = (res == RegExpMacroAssemblerIA32::SUCCESS);
644 729
645 RegExpMacroAssemblerIA32::Result res; 730 if (rc) {
646 731 // Capture values are relative to start_offset only.
647 if (flatshape.IsExternal()) { 732 for (int i = 0; i < offsets_vector_length; i++) {
648 const byte* address; 733 if (offsets_vector[i] >= 0) {
649 if (is_ascii) { 734 offsets_vector[i] += previous_index;
650 ExternalAsciiString* ext = ExternalAsciiString::cast(*subject);
651 address = reinterpret_cast<const byte*>(ext->resource()->data());
652 } else {
653 ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject);
654 address = reinterpret_cast<const byte*>(ext->resource()->data());
655 }
656 res = RegExpMacroAssemblerIA32::Execute(
657 *code,
658 const_cast<Address*>(&address),
659 start_offset << char_size_shift,
660 end_offset << char_size_shift,
661 offsets_vector,
662 previous_index == 0);
663 } else { // Sequential string
664 ASSERT(StringShape(*subject).IsSequential());
665 Address char_address =
666 is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress()
667 : SeqTwoByteString::cast(*subject)->GetCharsAddress();
668 int byte_offset = char_address - reinterpret_cast<Address>(*subject);
669 res = RegExpMacroAssemblerIA32::Execute(
670 *code,
671 reinterpret_cast<Address*>(subject.location()),
672 byte_offset + (start_offset << char_size_shift),
673 byte_offset + (end_offset << char_size_shift),
674 offsets_vector,
675 previous_index == 0);
676 }
677
678 if (res == RegExpMacroAssemblerIA32::EXCEPTION) {
679 ASSERT(Top::has_pending_exception());
680 return Handle<Object>::null();
681 }
682 rc = (res == RegExpMacroAssemblerIA32::SUCCESS);
683
684 if (rc) {
685 // Capture values are relative to start_offset only.
686 for (int i = 0; i < offsets_vector_length; i++) {
687 if (offsets_vector[i] >= 0) {
688 offsets_vector[i] += previous_index;
689 }
690 } 735 }
691 } 736 }
692 break; 737 }
693 #else 738 #else
694 UNIMPLEMENTED(); 739 UNIMPLEMENTED();
695 rc = false; 740 rc = false;
696 break;
697 #endif 741 #endif
742 } else {
743 for (int i = number_of_capture_registers - 1; i >= 0; i--) {
744 offsets_vector[i] = -1;
698 } 745 }
699 case RegExpMacroAssembler::kBytecodeImplementation: { 746 Handle<ByteArray> byte_codes = IrregexpByteCode(regexp, is_ascii);
700 for (int i = number_of_capture_registers - 1; i >= 0; i--) {
701 offsets_vector[i] = -1;
702 }
703 Handle<ByteArray> byte_codes = IrregexpByteCode(irregexp);
704 747
705 rc = IrregexpInterpreter::Match(byte_codes, 748 rc = IrregexpInterpreter::Match(byte_codes,
706 subject, 749 subject,
707 offsets_vector, 750 offsets_vector,
708 previous_index); 751 previous_index);
709 break; 752 rc = true;
710 }
711 case RegExpMacroAssembler::kARMImplementation:
712 default:
713 UNREACHABLE();
714 rc = false;
715 break;
716 } 753 }
717 754
718 if (!rc) { 755 if (!rc) {
719 return Factory::null_value(); 756 return Factory::null_value();
720 } 757 }
721 758
722 Handle<FixedArray> array(last_match_info->elements()); 759 // TODO(lrn): How do we know the array is long enough.
Erik Corry 2009/03/02 12:01:48 Because we called EnsureSize at all the places whe
Lasse Reichstein 2009/03/02 13:54:48 Done.
760 FixedArray* array = last_match_info->elements();
723 // The captures come in (start, end+1) pairs. 761 // The captures come in (start, end+1) pairs.
724 for (int i = 0; i < number_of_capture_registers; i += 2) { 762 for (int i = 0; i < number_of_capture_registers; i += 2) {
725 SetCapture(*array, i, offsets_vector[i]); 763 SetCapture(array, i, offsets_vector[i]);
726 SetCapture(*array, i + 1, offsets_vector[i + 1]); 764 SetCapture(array, i + 1, offsets_vector[i + 1]);
727 } 765 }
728 SetLastCaptureCount(*array, number_of_capture_registers); 766 SetLastCaptureCount(array, number_of_capture_registers);
729 SetLastSubject(*array, *subject); 767 SetLastSubject(array, *subject);
730 SetLastInput(*array, *subject); 768 SetLastInput(array, *subject);
731 return last_match_info; 769 return last_match_info;
732 } 770 }
733 771
734 772
735 // ------------------------------------------------------------------- 773 // -------------------------------------------------------------------
736 // Implmentation of the Irregexp regular expression engine. 774 // Implmentation of the Irregexp regular expression engine.
Erik Corry 2009/03/02 12:01:48 Spolling! (Not yours...)
737 // 775 //
738 // The Irregexp regular expression engine is intended to be a complete 776 // The Irregexp regular expression engine is intended to be a complete
739 // implementation of ECMAScript regular expressions. It generates either 777 // implementation of ECMAScript regular expressions. It generates either
740 // bytecodes or native code. 778 // bytecodes or native code.
741 779
742 // The Irregexp regexp engine is structured in three steps. 780 // The Irregexp regexp engine is structured in three steps.
743 // 1) The parser generates an abstract syntax tree. See ast.cc. 781 // 1) The parser generates an abstract syntax tree. See ast.cc.
744 // 2) From the AST a node network is created. The nodes are all 782 // 2) From the AST a node network is created. The nodes are all
745 // subclasses of RegExpNode. The nodes represent states when 783 // subclasses of RegExpNode. The nodes represent states when
746 // executing a regular expression. Several optimizations are 784 // executing a regular expression. Several optimizations are
(...skipping 196 matching lines...) Expand 10 before | Expand all | Expand 10 after
943 RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii); 981 RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii);
944 982
945 int AllocateRegister() { 983 int AllocateRegister() {
946 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) { 984 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
947 reg_exp_too_big_ = true; 985 reg_exp_too_big_ = true;
948 return next_register_; 986 return next_register_;
949 } 987 }
950 return next_register_++; 988 return next_register_++;
951 } 989 }
952 990
953 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, 991 RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler,
954 RegExpNode* start, 992 RegExpNode* start,
955 int capture_count, 993 int capture_count,
956 Handle<String> pattern); 994 Handle<String> pattern);
957 995
958 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } 996 inline void AddWork(RegExpNode* node) { work_list_->Add(node); }
959 997
960 static const int kImplementationOffset = 0; 998 static const int kImplementationOffset = 0;
961 static const int kNumberOfRegistersOffset = 0; 999 static const int kNumberOfRegistersOffset = 0;
962 static const int kCodeOffset = 1; 1000 static const int kCodeOffset = 1;
963 1001
964 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } 1002 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
965 EndNode* accept() { return accept_; } 1003 EndNode* accept() { return accept_; }
966 1004
(...skipping 24 matching lines...) Expand all
991 public: 1029 public:
992 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { 1030 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
993 compiler->IncrementRecursionDepth(); 1031 compiler->IncrementRecursionDepth();
994 } 1032 }
995 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } 1033 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
996 private: 1034 private:
997 RegExpCompiler* compiler_; 1035 RegExpCompiler* compiler_;
998 }; 1036 };
999 1037
1000 1038
1001 static Handle<FixedArray> IrregexpRegExpTooBig(Handle<String> pattern) { 1039 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() {
1002 Handle<JSArray> array = Factory::NewJSArray(2); 1040 return RegExpEngine::CompilationResult("RegExp too big");
1003 SetElement(array, 0, pattern);
1004 const char* message = "RegExp too big";
1005 SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(message)));
1006 Handle<Object> regexp_err =
1007 Factory::NewSyntaxError("malformed_regexp", array);
1008 Top::Throw(*regexp_err);
1009 return Handle<FixedArray>();
1010 } 1041 }
1011 1042
1012 1043
1013 // Attempts to compile the regexp using an Irregexp code generator. Returns 1044 // Attempts to compile the regexp using an Irregexp code generator. Returns
1014 // a fixed array or a null handle depending on whether it succeeded. 1045 // a fixed array or a null handle depending on whether it succeeded.
1015 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii) 1046 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii)
1016 : next_register_(2 * (capture_count + 1)), 1047 : next_register_(2 * (capture_count + 1)),
1017 work_list_(NULL), 1048 work_list_(NULL),
1018 recursion_depth_(0), 1049 recursion_depth_(0),
1019 ignore_case_(ignore_case), 1050 ignore_case_(ignore_case),
1020 ascii_(ascii), 1051 ascii_(ascii),
1021 reg_exp_too_big_(false) { 1052 reg_exp_too_big_(false) {
1022 accept_ = new EndNode(EndNode::ACCEPT); 1053 accept_ = new EndNode(EndNode::ACCEPT);
1023 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); 1054 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);
1024 } 1055 }
1025 1056
1026 1057
1027 Handle<FixedArray> RegExpCompiler::Assemble( 1058 RegExpEngine::CompilationResult RegExpCompiler::Assemble(
1028 RegExpMacroAssembler* macro_assembler, 1059 RegExpMacroAssembler* macro_assembler,
1029 RegExpNode* start, 1060 RegExpNode* start,
1030 int capture_count, 1061 int capture_count,
1031 Handle<String> pattern) { 1062 Handle<String> pattern) {
1032 #ifdef DEBUG 1063 #ifdef DEBUG
1033 if (FLAG_trace_regexp_assembler) 1064 if (FLAG_trace_regexp_assembler)
1034 macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler); 1065 macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler);
1035 else 1066 else
1036 #endif 1067 #endif
1037 macro_assembler_ = macro_assembler; 1068 macro_assembler_ = macro_assembler;
1038 List <RegExpNode*> work_list(0); 1069 List <RegExpNode*> work_list(0);
1039 work_list_ = &work_list; 1070 work_list_ = &work_list;
1040 Label fail; 1071 Label fail;
1041 macro_assembler_->PushBacktrack(&fail); 1072 macro_assembler_->PushBacktrack(&fail);
1042 Trace new_trace; 1073 Trace new_trace;
1043 start->Emit(this, &new_trace); 1074 start->Emit(this, &new_trace);
1044 macro_assembler_->Bind(&fail); 1075 macro_assembler_->Bind(&fail);
1045 macro_assembler_->Fail(); 1076 macro_assembler_->Fail();
1046 while (!work_list.is_empty()) { 1077 while (!work_list.is_empty()) {
1047 work_list.RemoveLast()->Emit(this, &new_trace); 1078 work_list.RemoveLast()->Emit(this, &new_trace);
1048 } 1079 }
1049 if (reg_exp_too_big_) return IrregexpRegExpTooBig(pattern); 1080 if (reg_exp_too_big_) return IrregexpRegExpTooBig();
1050 Handle<FixedArray> array = 1081
1051 Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength);
1052 array->set(RegExpImpl::kIrregexpImplementationIndex,
1053 Smi::FromInt(macro_assembler_->Implementation()));
1054 array->set(RegExpImpl::kIrregexpNumberOfRegistersIndex,
1055 Smi::FromInt(next_register_));
1056 array->set(RegExpImpl::kIrregexpNumberOfCapturesIndex,
1057 Smi::FromInt(capture_count));
1058 Handle<Object> code = macro_assembler_->GetCode(pattern); 1082 Handle<Object> code = macro_assembler_->GetCode(pattern);
1059 array->set(RegExpImpl::kIrregexpCodeIndex, *code); 1083
1060 work_list_ = NULL; 1084 work_list_ = NULL;
1061 #ifdef DEBUG 1085 #ifdef DEBUG
1062 if (FLAG_trace_regexp_assembler) { 1086 if (FLAG_trace_regexp_assembler) {
1063 delete macro_assembler_; 1087 delete macro_assembler_;
1064 } 1088 }
1065 #endif 1089 #endif
1066 return array; 1090 return RegExpEngine::CompilationResult(*code, next_register_);
1067 } 1091 }
1068 1092
1069 1093
1070 bool Trace::DeferredAction::Mentions(int that) { 1094 bool Trace::DeferredAction::Mentions(int that) {
1071 if (type() == ActionNode::CLEAR_CAPTURES) { 1095 if (type() == ActionNode::CLEAR_CAPTURES) {
1072 Interval range = static_cast<DeferredClearCaptures*>(this)->range(); 1096 Interval range = static_cast<DeferredClearCaptures*>(this)->range();
1073 return range.Contains(that); 1097 return range.Contains(that);
1074 } else { 1098 } else {
1075 return reg() == that; 1099 return reg() == that;
1076 } 1100 }
(...skipping 3563 matching lines...) Expand 10 before | Expand all | Expand 10 after
4640 } 4664 }
4641 } 4665 }
4642 4666
4643 4667
4644 void DispatchTableConstructor::VisitAction(ActionNode* that) { 4668 void DispatchTableConstructor::VisitAction(ActionNode* that) {
4645 RegExpNode* target = that->on_success(); 4669 RegExpNode* target = that->on_success();
4646 target->Accept(this); 4670 target->Accept(this);
4647 } 4671 }
4648 4672
4649 4673
4650 Handle<FixedArray> RegExpEngine::Compile(RegExpCompileData* data, 4674 RegExpEngine::CompilationResult RegExpEngine::Compile(RegExpCompileData* data,
4651 bool ignore_case, 4675 bool ignore_case,
4652 bool is_multiline, 4676 bool is_multiline,
4653 Handle<String> pattern, 4677 Handle<String> pattern,
4654 bool is_ascii) { 4678 bool is_ascii) {
4655 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { 4679 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
4656 return IrregexpRegExpTooBig(pattern); 4680 return IrregexpRegExpTooBig();
4657 } 4681 }
4658 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii); 4682 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii);
4659 // Wrap the body of the regexp in capture #0. 4683 // Wrap the body of the regexp in capture #0.
4660 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, 4684 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree,
4661 0, 4685 0,
4662 &compiler, 4686 &compiler,
4663 compiler.accept()); 4687 compiler.accept());
4664 RegExpNode* node = captured_body; 4688 RegExpNode* node = captured_body;
4665 if (!data->tree->IsAnchored()) { 4689 if (!data->tree->IsAnchored()) {
4666 // Add a .*? at the beginning, outside the body capture, unless 4690 // Add a .*? at the beginning, outside the body capture, unless
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
4713 EmbeddedVector<byte, 1024> codes; 4737 EmbeddedVector<byte, 1024> codes;
4714 RegExpMacroAssemblerIrregexp macro_assembler(codes); 4738 RegExpMacroAssemblerIrregexp macro_assembler(codes);
4715 return compiler.Assemble(&macro_assembler, 4739 return compiler.Assemble(&macro_assembler,
4716 node, 4740 node,
4717 data->capture_count, 4741 data->capture_count,
4718 pattern); 4742 pattern);
4719 } 4743 }
4720 4744
4721 4745
4722 }} // namespace v8::internal 4746 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.h ('k') | src/objects.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698